1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * net/sched/sch_api.c Packet scheduler API.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * Fixes:
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
10*4882a593Smuzhiyun * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
11*4882a593Smuzhiyun * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
12*4882a593Smuzhiyun */
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun #include <linux/module.h>
15*4882a593Smuzhiyun #include <linux/types.h>
16*4882a593Smuzhiyun #include <linux/kernel.h>
17*4882a593Smuzhiyun #include <linux/string.h>
18*4882a593Smuzhiyun #include <linux/errno.h>
19*4882a593Smuzhiyun #include <linux/skbuff.h>
20*4882a593Smuzhiyun #include <linux/init.h>
21*4882a593Smuzhiyun #include <linux/proc_fs.h>
22*4882a593Smuzhiyun #include <linux/seq_file.h>
23*4882a593Smuzhiyun #include <linux/kmod.h>
24*4882a593Smuzhiyun #include <linux/list.h>
25*4882a593Smuzhiyun #include <linux/hrtimer.h>
26*4882a593Smuzhiyun #include <linux/slab.h>
27*4882a593Smuzhiyun #include <linux/hashtable.h>
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun #include <net/net_namespace.h>
30*4882a593Smuzhiyun #include <net/sock.h>
31*4882a593Smuzhiyun #include <net/netlink.h>
32*4882a593Smuzhiyun #include <net/pkt_sched.h>
33*4882a593Smuzhiyun #include <net/pkt_cls.h>
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun #include <trace/events/qdisc.h>
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun /*
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun Short review.
40*4882a593Smuzhiyun -------------
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun This file consists of two interrelated parts:
43*4882a593Smuzhiyun
44*4882a593Smuzhiyun 1. queueing disciplines manager frontend.
45*4882a593Smuzhiyun 2. traffic classes manager frontend.
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun Generally, queueing discipline ("qdisc") is a black box,
48*4882a593Smuzhiyun which is able to enqueue packets and to dequeue them (when
49*4882a593Smuzhiyun device is ready to send something) in order and at times
50*4882a593Smuzhiyun determined by algorithm hidden in it.
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun qdisc's are divided to two categories:
53*4882a593Smuzhiyun - "queues", which have no internal structure visible from outside.
54*4882a593Smuzhiyun - "schedulers", which split all the packets to "traffic classes",
55*4882a593Smuzhiyun using "packet classifiers" (look at cls_api.c)
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun In turn, classes may have child qdiscs (as rule, queues)
58*4882a593Smuzhiyun attached to them etc. etc. etc.
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun The goal of the routines in this file is to translate
61*4882a593Smuzhiyun information supplied by user in the form of handles
62*4882a593Smuzhiyun to more intelligible for kernel form, to make some sanity
63*4882a593Smuzhiyun checks and part of work, which is common to all qdiscs
64*4882a593Smuzhiyun and to provide rtnetlink notifications.
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun All real intelligent work is done inside qdisc modules.
67*4882a593Smuzhiyun
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun
70*4882a593Smuzhiyun Every discipline has two major routines: enqueue and dequeue.
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun ---dequeue
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun dequeue usually returns a skb to send. It is allowed to return NULL,
75*4882a593Smuzhiyun but it does not mean that queue is empty, it just means that
76*4882a593Smuzhiyun discipline does not want to send anything this time.
77*4882a593Smuzhiyun Queue is really empty if q->q.qlen == 0.
78*4882a593Smuzhiyun For complicated disciplines with multiple queues q->q is not
79*4882a593Smuzhiyun real packet queue, but however q->q.qlen must be valid.
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun ---enqueue
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun enqueue returns 0, if packet was enqueued successfully.
84*4882a593Smuzhiyun If packet (this one or another one) was dropped, it returns
85*4882a593Smuzhiyun not zero error code.
86*4882a593Smuzhiyun NET_XMIT_DROP - this packet dropped
87*4882a593Smuzhiyun Expected action: do not backoff, but wait until queue will clear.
88*4882a593Smuzhiyun NET_XMIT_CN - probably this packet enqueued, but another one dropped.
89*4882a593Smuzhiyun Expected action: backoff or ignore
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun Auxiliary routines:
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun ---peek
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun like dequeue but without removing a packet from the queue
96*4882a593Smuzhiyun
97*4882a593Smuzhiyun ---reset
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun returns qdisc to initial state: purge all buffers, clear all
100*4882a593Smuzhiyun timers, counters (except for statistics) etc.
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun ---init
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun initializes newly created qdisc.
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun ---destroy
107*4882a593Smuzhiyun
108*4882a593Smuzhiyun destroys resources allocated by init and during lifetime of qdisc.
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun ---change
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun changes qdisc parameters.
113*4882a593Smuzhiyun */
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun /* Protects list of registered TC modules. It is pure SMP lock. */
116*4882a593Smuzhiyun static DEFINE_RWLOCK(qdisc_mod_lock);
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun
119*4882a593Smuzhiyun /************************************************
120*4882a593Smuzhiyun * Queueing disciplines manipulation. *
121*4882a593Smuzhiyun ************************************************/
122*4882a593Smuzhiyun
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun /* The list of all installed queueing disciplines. */
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun static struct Qdisc_ops *qdisc_base;
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun /* Register/unregister queueing discipline */
129*4882a593Smuzhiyun
register_qdisc(struct Qdisc_ops * qops)130*4882a593Smuzhiyun int register_qdisc(struct Qdisc_ops *qops)
131*4882a593Smuzhiyun {
132*4882a593Smuzhiyun struct Qdisc_ops *q, **qp;
133*4882a593Smuzhiyun int rc = -EEXIST;
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun write_lock(&qdisc_mod_lock);
136*4882a593Smuzhiyun for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
137*4882a593Smuzhiyun if (!strcmp(qops->id, q->id))
138*4882a593Smuzhiyun goto out;
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun if (qops->enqueue == NULL)
141*4882a593Smuzhiyun qops->enqueue = noop_qdisc_ops.enqueue;
142*4882a593Smuzhiyun if (qops->peek == NULL) {
143*4882a593Smuzhiyun if (qops->dequeue == NULL)
144*4882a593Smuzhiyun qops->peek = noop_qdisc_ops.peek;
145*4882a593Smuzhiyun else
146*4882a593Smuzhiyun goto out_einval;
147*4882a593Smuzhiyun }
148*4882a593Smuzhiyun if (qops->dequeue == NULL)
149*4882a593Smuzhiyun qops->dequeue = noop_qdisc_ops.dequeue;
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun if (qops->cl_ops) {
152*4882a593Smuzhiyun const struct Qdisc_class_ops *cops = qops->cl_ops;
153*4882a593Smuzhiyun
154*4882a593Smuzhiyun if (!(cops->find && cops->walk && cops->leaf))
155*4882a593Smuzhiyun goto out_einval;
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
158*4882a593Smuzhiyun goto out_einval;
159*4882a593Smuzhiyun }
160*4882a593Smuzhiyun
161*4882a593Smuzhiyun qops->next = NULL;
162*4882a593Smuzhiyun *qp = qops;
163*4882a593Smuzhiyun rc = 0;
164*4882a593Smuzhiyun out:
165*4882a593Smuzhiyun write_unlock(&qdisc_mod_lock);
166*4882a593Smuzhiyun return rc;
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun out_einval:
169*4882a593Smuzhiyun rc = -EINVAL;
170*4882a593Smuzhiyun goto out;
171*4882a593Smuzhiyun }
172*4882a593Smuzhiyun EXPORT_SYMBOL(register_qdisc);
173*4882a593Smuzhiyun
unregister_qdisc(struct Qdisc_ops * qops)174*4882a593Smuzhiyun int unregister_qdisc(struct Qdisc_ops *qops)
175*4882a593Smuzhiyun {
176*4882a593Smuzhiyun struct Qdisc_ops *q, **qp;
177*4882a593Smuzhiyun int err = -ENOENT;
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun write_lock(&qdisc_mod_lock);
180*4882a593Smuzhiyun for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
181*4882a593Smuzhiyun if (q == qops)
182*4882a593Smuzhiyun break;
183*4882a593Smuzhiyun if (q) {
184*4882a593Smuzhiyun *qp = q->next;
185*4882a593Smuzhiyun q->next = NULL;
186*4882a593Smuzhiyun err = 0;
187*4882a593Smuzhiyun }
188*4882a593Smuzhiyun write_unlock(&qdisc_mod_lock);
189*4882a593Smuzhiyun return err;
190*4882a593Smuzhiyun }
191*4882a593Smuzhiyun EXPORT_SYMBOL(unregister_qdisc);
192*4882a593Smuzhiyun
193*4882a593Smuzhiyun /* Get default qdisc if not otherwise specified */
qdisc_get_default(char * name,size_t len)194*4882a593Smuzhiyun void qdisc_get_default(char *name, size_t len)
195*4882a593Smuzhiyun {
196*4882a593Smuzhiyun read_lock(&qdisc_mod_lock);
197*4882a593Smuzhiyun strlcpy(name, default_qdisc_ops->id, len);
198*4882a593Smuzhiyun read_unlock(&qdisc_mod_lock);
199*4882a593Smuzhiyun }
200*4882a593Smuzhiyun
qdisc_lookup_default(const char * name)201*4882a593Smuzhiyun static struct Qdisc_ops *qdisc_lookup_default(const char *name)
202*4882a593Smuzhiyun {
203*4882a593Smuzhiyun struct Qdisc_ops *q = NULL;
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun for (q = qdisc_base; q; q = q->next) {
206*4882a593Smuzhiyun if (!strcmp(name, q->id)) {
207*4882a593Smuzhiyun if (!try_module_get(q->owner))
208*4882a593Smuzhiyun q = NULL;
209*4882a593Smuzhiyun break;
210*4882a593Smuzhiyun }
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun
213*4882a593Smuzhiyun return q;
214*4882a593Smuzhiyun }
215*4882a593Smuzhiyun
216*4882a593Smuzhiyun /* Set new default qdisc to use */
qdisc_set_default(const char * name)217*4882a593Smuzhiyun int qdisc_set_default(const char *name)
218*4882a593Smuzhiyun {
219*4882a593Smuzhiyun const struct Qdisc_ops *ops;
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun if (!capable(CAP_NET_ADMIN))
222*4882a593Smuzhiyun return -EPERM;
223*4882a593Smuzhiyun
224*4882a593Smuzhiyun write_lock(&qdisc_mod_lock);
225*4882a593Smuzhiyun ops = qdisc_lookup_default(name);
226*4882a593Smuzhiyun if (!ops) {
227*4882a593Smuzhiyun /* Not found, drop lock and try to load module */
228*4882a593Smuzhiyun write_unlock(&qdisc_mod_lock);
229*4882a593Smuzhiyun request_module("sch_%s", name);
230*4882a593Smuzhiyun write_lock(&qdisc_mod_lock);
231*4882a593Smuzhiyun
232*4882a593Smuzhiyun ops = qdisc_lookup_default(name);
233*4882a593Smuzhiyun }
234*4882a593Smuzhiyun
235*4882a593Smuzhiyun if (ops) {
236*4882a593Smuzhiyun /* Set new default */
237*4882a593Smuzhiyun module_put(default_qdisc_ops->owner);
238*4882a593Smuzhiyun default_qdisc_ops = ops;
239*4882a593Smuzhiyun }
240*4882a593Smuzhiyun write_unlock(&qdisc_mod_lock);
241*4882a593Smuzhiyun
242*4882a593Smuzhiyun return ops ? 0 : -ENOENT;
243*4882a593Smuzhiyun }
244*4882a593Smuzhiyun
245*4882a593Smuzhiyun #ifdef CONFIG_NET_SCH_DEFAULT
246*4882a593Smuzhiyun /* Set default value from kernel config */
sch_default_qdisc(void)247*4882a593Smuzhiyun static int __init sch_default_qdisc(void)
248*4882a593Smuzhiyun {
249*4882a593Smuzhiyun return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun late_initcall(sch_default_qdisc);
252*4882a593Smuzhiyun #endif
253*4882a593Smuzhiyun
254*4882a593Smuzhiyun /* We know handle. Find qdisc among all qdisc's attached to device
255*4882a593Smuzhiyun * (root qdisc, all its children, children of children etc.)
256*4882a593Smuzhiyun * Note: caller either uses rtnl or rcu_read_lock()
257*4882a593Smuzhiyun */
258*4882a593Smuzhiyun
qdisc_match_from_root(struct Qdisc * root,u32 handle)259*4882a593Smuzhiyun static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
260*4882a593Smuzhiyun {
261*4882a593Smuzhiyun struct Qdisc *q;
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun if (!qdisc_dev(root))
264*4882a593Smuzhiyun return (root->handle == handle ? root : NULL);
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun if (!(root->flags & TCQ_F_BUILTIN) &&
267*4882a593Smuzhiyun root->handle == handle)
268*4882a593Smuzhiyun return root;
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
271*4882a593Smuzhiyun lockdep_rtnl_is_held()) {
272*4882a593Smuzhiyun if (q->handle == handle)
273*4882a593Smuzhiyun return q;
274*4882a593Smuzhiyun }
275*4882a593Smuzhiyun return NULL;
276*4882a593Smuzhiyun }
277*4882a593Smuzhiyun
qdisc_hash_add(struct Qdisc * q,bool invisible)278*4882a593Smuzhiyun void qdisc_hash_add(struct Qdisc *q, bool invisible)
279*4882a593Smuzhiyun {
280*4882a593Smuzhiyun if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
281*4882a593Smuzhiyun ASSERT_RTNL();
282*4882a593Smuzhiyun hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
283*4882a593Smuzhiyun if (invisible)
284*4882a593Smuzhiyun q->flags |= TCQ_F_INVISIBLE;
285*4882a593Smuzhiyun }
286*4882a593Smuzhiyun }
287*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_hash_add);
288*4882a593Smuzhiyun
qdisc_hash_del(struct Qdisc * q)289*4882a593Smuzhiyun void qdisc_hash_del(struct Qdisc *q)
290*4882a593Smuzhiyun {
291*4882a593Smuzhiyun if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
292*4882a593Smuzhiyun ASSERT_RTNL();
293*4882a593Smuzhiyun hash_del_rcu(&q->hash);
294*4882a593Smuzhiyun }
295*4882a593Smuzhiyun }
296*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_hash_del);
297*4882a593Smuzhiyun
qdisc_lookup(struct net_device * dev,u32 handle)298*4882a593Smuzhiyun struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
299*4882a593Smuzhiyun {
300*4882a593Smuzhiyun struct Qdisc *q;
301*4882a593Smuzhiyun
302*4882a593Smuzhiyun if (!handle)
303*4882a593Smuzhiyun return NULL;
304*4882a593Smuzhiyun q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
305*4882a593Smuzhiyun if (q)
306*4882a593Smuzhiyun goto out;
307*4882a593Smuzhiyun
308*4882a593Smuzhiyun if (dev_ingress_queue(dev))
309*4882a593Smuzhiyun q = qdisc_match_from_root(
310*4882a593Smuzhiyun dev_ingress_queue(dev)->qdisc_sleeping,
311*4882a593Smuzhiyun handle);
312*4882a593Smuzhiyun out:
313*4882a593Smuzhiyun return q;
314*4882a593Smuzhiyun }
315*4882a593Smuzhiyun
qdisc_lookup_rcu(struct net_device * dev,u32 handle)316*4882a593Smuzhiyun struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
317*4882a593Smuzhiyun {
318*4882a593Smuzhiyun struct netdev_queue *nq;
319*4882a593Smuzhiyun struct Qdisc *q;
320*4882a593Smuzhiyun
321*4882a593Smuzhiyun if (!handle)
322*4882a593Smuzhiyun return NULL;
323*4882a593Smuzhiyun q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
324*4882a593Smuzhiyun if (q)
325*4882a593Smuzhiyun goto out;
326*4882a593Smuzhiyun
327*4882a593Smuzhiyun nq = dev_ingress_queue_rcu(dev);
328*4882a593Smuzhiyun if (nq)
329*4882a593Smuzhiyun q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
330*4882a593Smuzhiyun out:
331*4882a593Smuzhiyun return q;
332*4882a593Smuzhiyun }
333*4882a593Smuzhiyun
qdisc_leaf(struct Qdisc * p,u32 classid)334*4882a593Smuzhiyun static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
335*4882a593Smuzhiyun {
336*4882a593Smuzhiyun unsigned long cl;
337*4882a593Smuzhiyun const struct Qdisc_class_ops *cops = p->ops->cl_ops;
338*4882a593Smuzhiyun
339*4882a593Smuzhiyun if (cops == NULL)
340*4882a593Smuzhiyun return NULL;
341*4882a593Smuzhiyun cl = cops->find(p, classid);
342*4882a593Smuzhiyun
343*4882a593Smuzhiyun if (cl == 0)
344*4882a593Smuzhiyun return NULL;
345*4882a593Smuzhiyun return cops->leaf(p, cl);
346*4882a593Smuzhiyun }
347*4882a593Smuzhiyun
348*4882a593Smuzhiyun /* Find queueing discipline by name */
349*4882a593Smuzhiyun
qdisc_lookup_ops(struct nlattr * kind)350*4882a593Smuzhiyun static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
351*4882a593Smuzhiyun {
352*4882a593Smuzhiyun struct Qdisc_ops *q = NULL;
353*4882a593Smuzhiyun
354*4882a593Smuzhiyun if (kind) {
355*4882a593Smuzhiyun read_lock(&qdisc_mod_lock);
356*4882a593Smuzhiyun for (q = qdisc_base; q; q = q->next) {
357*4882a593Smuzhiyun if (nla_strcmp(kind, q->id) == 0) {
358*4882a593Smuzhiyun if (!try_module_get(q->owner))
359*4882a593Smuzhiyun q = NULL;
360*4882a593Smuzhiyun break;
361*4882a593Smuzhiyun }
362*4882a593Smuzhiyun }
363*4882a593Smuzhiyun read_unlock(&qdisc_mod_lock);
364*4882a593Smuzhiyun }
365*4882a593Smuzhiyun return q;
366*4882a593Smuzhiyun }
367*4882a593Smuzhiyun
368*4882a593Smuzhiyun /* The linklayer setting were not transferred from iproute2, in older
369*4882a593Smuzhiyun * versions, and the rate tables lookup systems have been dropped in
370*4882a593Smuzhiyun * the kernel. To keep backward compatible with older iproute2 tc
371*4882a593Smuzhiyun * utils, we detect the linklayer setting by detecting if the rate
372*4882a593Smuzhiyun * table were modified.
373*4882a593Smuzhiyun *
374*4882a593Smuzhiyun * For linklayer ATM table entries, the rate table will be aligned to
375*4882a593Smuzhiyun * 48 bytes, thus some table entries will contain the same value. The
376*4882a593Smuzhiyun * mpu (min packet unit) is also encoded into the old rate table, thus
377*4882a593Smuzhiyun * starting from the mpu, we find low and high table entries for
378*4882a593Smuzhiyun * mapping this cell. If these entries contain the same value, when
379*4882a593Smuzhiyun * the rate tables have been modified for linklayer ATM.
380*4882a593Smuzhiyun *
381*4882a593Smuzhiyun * This is done by rounding mpu to the nearest 48 bytes cell/entry,
382*4882a593Smuzhiyun * and then roundup to the next cell, calc the table entry one below,
383*4882a593Smuzhiyun * and compare.
384*4882a593Smuzhiyun */
__detect_linklayer(struct tc_ratespec * r,__u32 * rtab)385*4882a593Smuzhiyun static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
386*4882a593Smuzhiyun {
387*4882a593Smuzhiyun int low = roundup(r->mpu, 48);
388*4882a593Smuzhiyun int high = roundup(low+1, 48);
389*4882a593Smuzhiyun int cell_low = low >> r->cell_log;
390*4882a593Smuzhiyun int cell_high = (high >> r->cell_log) - 1;
391*4882a593Smuzhiyun
392*4882a593Smuzhiyun /* rtab is too inaccurate at rates > 100Mbit/s */
393*4882a593Smuzhiyun if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
394*4882a593Smuzhiyun pr_debug("TC linklayer: Giving up ATM detection\n");
395*4882a593Smuzhiyun return TC_LINKLAYER_ETHERNET;
396*4882a593Smuzhiyun }
397*4882a593Smuzhiyun
398*4882a593Smuzhiyun if ((cell_high > cell_low) && (cell_high < 256)
399*4882a593Smuzhiyun && (rtab[cell_low] == rtab[cell_high])) {
400*4882a593Smuzhiyun pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
401*4882a593Smuzhiyun cell_low, cell_high, rtab[cell_high]);
402*4882a593Smuzhiyun return TC_LINKLAYER_ATM;
403*4882a593Smuzhiyun }
404*4882a593Smuzhiyun return TC_LINKLAYER_ETHERNET;
405*4882a593Smuzhiyun }
406*4882a593Smuzhiyun
407*4882a593Smuzhiyun static struct qdisc_rate_table *qdisc_rtab_list;
408*4882a593Smuzhiyun
qdisc_get_rtab(struct tc_ratespec * r,struct nlattr * tab,struct netlink_ext_ack * extack)409*4882a593Smuzhiyun struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
410*4882a593Smuzhiyun struct nlattr *tab,
411*4882a593Smuzhiyun struct netlink_ext_ack *extack)
412*4882a593Smuzhiyun {
413*4882a593Smuzhiyun struct qdisc_rate_table *rtab;
414*4882a593Smuzhiyun
415*4882a593Smuzhiyun if (tab == NULL || r->rate == 0 ||
416*4882a593Smuzhiyun r->cell_log == 0 || r->cell_log >= 32 ||
417*4882a593Smuzhiyun nla_len(tab) != TC_RTAB_SIZE) {
418*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
419*4882a593Smuzhiyun return NULL;
420*4882a593Smuzhiyun }
421*4882a593Smuzhiyun
422*4882a593Smuzhiyun for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
423*4882a593Smuzhiyun if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
424*4882a593Smuzhiyun !memcmp(&rtab->data, nla_data(tab), 1024)) {
425*4882a593Smuzhiyun rtab->refcnt++;
426*4882a593Smuzhiyun return rtab;
427*4882a593Smuzhiyun }
428*4882a593Smuzhiyun }
429*4882a593Smuzhiyun
430*4882a593Smuzhiyun rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
431*4882a593Smuzhiyun if (rtab) {
432*4882a593Smuzhiyun rtab->rate = *r;
433*4882a593Smuzhiyun rtab->refcnt = 1;
434*4882a593Smuzhiyun memcpy(rtab->data, nla_data(tab), 1024);
435*4882a593Smuzhiyun if (r->linklayer == TC_LINKLAYER_UNAWARE)
436*4882a593Smuzhiyun r->linklayer = __detect_linklayer(r, rtab->data);
437*4882a593Smuzhiyun rtab->next = qdisc_rtab_list;
438*4882a593Smuzhiyun qdisc_rtab_list = rtab;
439*4882a593Smuzhiyun } else {
440*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
441*4882a593Smuzhiyun }
442*4882a593Smuzhiyun return rtab;
443*4882a593Smuzhiyun }
444*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_get_rtab);
445*4882a593Smuzhiyun
qdisc_put_rtab(struct qdisc_rate_table * tab)446*4882a593Smuzhiyun void qdisc_put_rtab(struct qdisc_rate_table *tab)
447*4882a593Smuzhiyun {
448*4882a593Smuzhiyun struct qdisc_rate_table *rtab, **rtabp;
449*4882a593Smuzhiyun
450*4882a593Smuzhiyun if (!tab || --tab->refcnt)
451*4882a593Smuzhiyun return;
452*4882a593Smuzhiyun
453*4882a593Smuzhiyun for (rtabp = &qdisc_rtab_list;
454*4882a593Smuzhiyun (rtab = *rtabp) != NULL;
455*4882a593Smuzhiyun rtabp = &rtab->next) {
456*4882a593Smuzhiyun if (rtab == tab) {
457*4882a593Smuzhiyun *rtabp = rtab->next;
458*4882a593Smuzhiyun kfree(rtab);
459*4882a593Smuzhiyun return;
460*4882a593Smuzhiyun }
461*4882a593Smuzhiyun }
462*4882a593Smuzhiyun }
463*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_put_rtab);
464*4882a593Smuzhiyun
465*4882a593Smuzhiyun static LIST_HEAD(qdisc_stab_list);
466*4882a593Smuzhiyun
467*4882a593Smuzhiyun static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
468*4882a593Smuzhiyun [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
469*4882a593Smuzhiyun [TCA_STAB_DATA] = { .type = NLA_BINARY },
470*4882a593Smuzhiyun };
471*4882a593Smuzhiyun
qdisc_get_stab(struct nlattr * opt,struct netlink_ext_ack * extack)472*4882a593Smuzhiyun static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
473*4882a593Smuzhiyun struct netlink_ext_ack *extack)
474*4882a593Smuzhiyun {
475*4882a593Smuzhiyun struct nlattr *tb[TCA_STAB_MAX + 1];
476*4882a593Smuzhiyun struct qdisc_size_table *stab;
477*4882a593Smuzhiyun struct tc_sizespec *s;
478*4882a593Smuzhiyun unsigned int tsize = 0;
479*4882a593Smuzhiyun u16 *tab = NULL;
480*4882a593Smuzhiyun int err;
481*4882a593Smuzhiyun
482*4882a593Smuzhiyun err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
483*4882a593Smuzhiyun extack);
484*4882a593Smuzhiyun if (err < 0)
485*4882a593Smuzhiyun return ERR_PTR(err);
486*4882a593Smuzhiyun if (!tb[TCA_STAB_BASE]) {
487*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
488*4882a593Smuzhiyun return ERR_PTR(-EINVAL);
489*4882a593Smuzhiyun }
490*4882a593Smuzhiyun
491*4882a593Smuzhiyun s = nla_data(tb[TCA_STAB_BASE]);
492*4882a593Smuzhiyun
493*4882a593Smuzhiyun if (s->tsize > 0) {
494*4882a593Smuzhiyun if (!tb[TCA_STAB_DATA]) {
495*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
496*4882a593Smuzhiyun return ERR_PTR(-EINVAL);
497*4882a593Smuzhiyun }
498*4882a593Smuzhiyun tab = nla_data(tb[TCA_STAB_DATA]);
499*4882a593Smuzhiyun tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
500*4882a593Smuzhiyun }
501*4882a593Smuzhiyun
502*4882a593Smuzhiyun if (tsize != s->tsize || (!tab && tsize > 0)) {
503*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Invalid size of size table");
504*4882a593Smuzhiyun return ERR_PTR(-EINVAL);
505*4882a593Smuzhiyun }
506*4882a593Smuzhiyun
507*4882a593Smuzhiyun list_for_each_entry(stab, &qdisc_stab_list, list) {
508*4882a593Smuzhiyun if (memcmp(&stab->szopts, s, sizeof(*s)))
509*4882a593Smuzhiyun continue;
510*4882a593Smuzhiyun if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
511*4882a593Smuzhiyun continue;
512*4882a593Smuzhiyun stab->refcnt++;
513*4882a593Smuzhiyun return stab;
514*4882a593Smuzhiyun }
515*4882a593Smuzhiyun
516*4882a593Smuzhiyun if (s->size_log > STAB_SIZE_LOG_MAX ||
517*4882a593Smuzhiyun s->cell_log > STAB_SIZE_LOG_MAX) {
518*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
519*4882a593Smuzhiyun return ERR_PTR(-EINVAL);
520*4882a593Smuzhiyun }
521*4882a593Smuzhiyun
522*4882a593Smuzhiyun stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
523*4882a593Smuzhiyun if (!stab)
524*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
525*4882a593Smuzhiyun
526*4882a593Smuzhiyun stab->refcnt = 1;
527*4882a593Smuzhiyun stab->szopts = *s;
528*4882a593Smuzhiyun if (tsize > 0)
529*4882a593Smuzhiyun memcpy(stab->data, tab, tsize * sizeof(u16));
530*4882a593Smuzhiyun
531*4882a593Smuzhiyun list_add_tail(&stab->list, &qdisc_stab_list);
532*4882a593Smuzhiyun
533*4882a593Smuzhiyun return stab;
534*4882a593Smuzhiyun }
535*4882a593Smuzhiyun
qdisc_put_stab(struct qdisc_size_table * tab)536*4882a593Smuzhiyun void qdisc_put_stab(struct qdisc_size_table *tab)
537*4882a593Smuzhiyun {
538*4882a593Smuzhiyun if (!tab)
539*4882a593Smuzhiyun return;
540*4882a593Smuzhiyun
541*4882a593Smuzhiyun if (--tab->refcnt == 0) {
542*4882a593Smuzhiyun list_del(&tab->list);
543*4882a593Smuzhiyun kfree_rcu(tab, rcu);
544*4882a593Smuzhiyun }
545*4882a593Smuzhiyun }
546*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_put_stab);
547*4882a593Smuzhiyun
qdisc_dump_stab(struct sk_buff * skb,struct qdisc_size_table * stab)548*4882a593Smuzhiyun static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
549*4882a593Smuzhiyun {
550*4882a593Smuzhiyun struct nlattr *nest;
551*4882a593Smuzhiyun
552*4882a593Smuzhiyun nest = nla_nest_start_noflag(skb, TCA_STAB);
553*4882a593Smuzhiyun if (nest == NULL)
554*4882a593Smuzhiyun goto nla_put_failure;
555*4882a593Smuzhiyun if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
556*4882a593Smuzhiyun goto nla_put_failure;
557*4882a593Smuzhiyun nla_nest_end(skb, nest);
558*4882a593Smuzhiyun
559*4882a593Smuzhiyun return skb->len;
560*4882a593Smuzhiyun
561*4882a593Smuzhiyun nla_put_failure:
562*4882a593Smuzhiyun return -1;
563*4882a593Smuzhiyun }
564*4882a593Smuzhiyun
__qdisc_calculate_pkt_len(struct sk_buff * skb,const struct qdisc_size_table * stab)565*4882a593Smuzhiyun void __qdisc_calculate_pkt_len(struct sk_buff *skb,
566*4882a593Smuzhiyun const struct qdisc_size_table *stab)
567*4882a593Smuzhiyun {
568*4882a593Smuzhiyun int pkt_len, slot;
569*4882a593Smuzhiyun
570*4882a593Smuzhiyun pkt_len = skb->len + stab->szopts.overhead;
571*4882a593Smuzhiyun if (unlikely(!stab->szopts.tsize))
572*4882a593Smuzhiyun goto out;
573*4882a593Smuzhiyun
574*4882a593Smuzhiyun slot = pkt_len + stab->szopts.cell_align;
575*4882a593Smuzhiyun if (unlikely(slot < 0))
576*4882a593Smuzhiyun slot = 0;
577*4882a593Smuzhiyun
578*4882a593Smuzhiyun slot >>= stab->szopts.cell_log;
579*4882a593Smuzhiyun if (likely(slot < stab->szopts.tsize))
580*4882a593Smuzhiyun pkt_len = stab->data[slot];
581*4882a593Smuzhiyun else
582*4882a593Smuzhiyun pkt_len = stab->data[stab->szopts.tsize - 1] *
583*4882a593Smuzhiyun (slot / stab->szopts.tsize) +
584*4882a593Smuzhiyun stab->data[slot % stab->szopts.tsize];
585*4882a593Smuzhiyun
586*4882a593Smuzhiyun pkt_len <<= stab->szopts.size_log;
587*4882a593Smuzhiyun out:
588*4882a593Smuzhiyun if (unlikely(pkt_len < 1))
589*4882a593Smuzhiyun pkt_len = 1;
590*4882a593Smuzhiyun qdisc_skb_cb(skb)->pkt_len = pkt_len;
591*4882a593Smuzhiyun }
592*4882a593Smuzhiyun EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
593*4882a593Smuzhiyun
qdisc_warn_nonwc(const char * txt,struct Qdisc * qdisc)594*4882a593Smuzhiyun void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
595*4882a593Smuzhiyun {
596*4882a593Smuzhiyun if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
597*4882a593Smuzhiyun pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
598*4882a593Smuzhiyun txt, qdisc->ops->id, qdisc->handle >> 16);
599*4882a593Smuzhiyun qdisc->flags |= TCQ_F_WARN_NONWC;
600*4882a593Smuzhiyun }
601*4882a593Smuzhiyun }
602*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_warn_nonwc);
603*4882a593Smuzhiyun
qdisc_watchdog(struct hrtimer * timer)604*4882a593Smuzhiyun static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
605*4882a593Smuzhiyun {
606*4882a593Smuzhiyun struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
607*4882a593Smuzhiyun timer);
608*4882a593Smuzhiyun
609*4882a593Smuzhiyun rcu_read_lock();
610*4882a593Smuzhiyun __netif_schedule(qdisc_root(wd->qdisc));
611*4882a593Smuzhiyun rcu_read_unlock();
612*4882a593Smuzhiyun
613*4882a593Smuzhiyun return HRTIMER_NORESTART;
614*4882a593Smuzhiyun }
615*4882a593Smuzhiyun
qdisc_watchdog_init_clockid(struct qdisc_watchdog * wd,struct Qdisc * qdisc,clockid_t clockid)616*4882a593Smuzhiyun void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
617*4882a593Smuzhiyun clockid_t clockid)
618*4882a593Smuzhiyun {
619*4882a593Smuzhiyun hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
620*4882a593Smuzhiyun wd->timer.function = qdisc_watchdog;
621*4882a593Smuzhiyun wd->qdisc = qdisc;
622*4882a593Smuzhiyun }
623*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
624*4882a593Smuzhiyun
qdisc_watchdog_init(struct qdisc_watchdog * wd,struct Qdisc * qdisc)625*4882a593Smuzhiyun void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
626*4882a593Smuzhiyun {
627*4882a593Smuzhiyun qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
628*4882a593Smuzhiyun }
629*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_watchdog_init);
630*4882a593Smuzhiyun
qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog * wd,u64 expires,u64 delta_ns)631*4882a593Smuzhiyun void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
632*4882a593Smuzhiyun u64 delta_ns)
633*4882a593Smuzhiyun {
634*4882a593Smuzhiyun if (test_bit(__QDISC_STATE_DEACTIVATED,
635*4882a593Smuzhiyun &qdisc_root_sleeping(wd->qdisc)->state))
636*4882a593Smuzhiyun return;
637*4882a593Smuzhiyun
638*4882a593Smuzhiyun if (hrtimer_is_queued(&wd->timer)) {
639*4882a593Smuzhiyun /* If timer is already set in [expires, expires + delta_ns],
640*4882a593Smuzhiyun * do not reprogram it.
641*4882a593Smuzhiyun */
642*4882a593Smuzhiyun if (wd->last_expires - expires <= delta_ns)
643*4882a593Smuzhiyun return;
644*4882a593Smuzhiyun }
645*4882a593Smuzhiyun
646*4882a593Smuzhiyun wd->last_expires = expires;
647*4882a593Smuzhiyun hrtimer_start_range_ns(&wd->timer,
648*4882a593Smuzhiyun ns_to_ktime(expires),
649*4882a593Smuzhiyun delta_ns,
650*4882a593Smuzhiyun HRTIMER_MODE_ABS_PINNED);
651*4882a593Smuzhiyun }
652*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
653*4882a593Smuzhiyun
qdisc_watchdog_cancel(struct qdisc_watchdog * wd)654*4882a593Smuzhiyun void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
655*4882a593Smuzhiyun {
656*4882a593Smuzhiyun hrtimer_cancel(&wd->timer);
657*4882a593Smuzhiyun }
658*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_watchdog_cancel);
659*4882a593Smuzhiyun
qdisc_class_hash_alloc(unsigned int n)660*4882a593Smuzhiyun static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
661*4882a593Smuzhiyun {
662*4882a593Smuzhiyun struct hlist_head *h;
663*4882a593Smuzhiyun unsigned int i;
664*4882a593Smuzhiyun
665*4882a593Smuzhiyun h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
666*4882a593Smuzhiyun
667*4882a593Smuzhiyun if (h != NULL) {
668*4882a593Smuzhiyun for (i = 0; i < n; i++)
669*4882a593Smuzhiyun INIT_HLIST_HEAD(&h[i]);
670*4882a593Smuzhiyun }
671*4882a593Smuzhiyun return h;
672*4882a593Smuzhiyun }
673*4882a593Smuzhiyun
qdisc_class_hash_grow(struct Qdisc * sch,struct Qdisc_class_hash * clhash)674*4882a593Smuzhiyun void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
675*4882a593Smuzhiyun {
676*4882a593Smuzhiyun struct Qdisc_class_common *cl;
677*4882a593Smuzhiyun struct hlist_node *next;
678*4882a593Smuzhiyun struct hlist_head *nhash, *ohash;
679*4882a593Smuzhiyun unsigned int nsize, nmask, osize;
680*4882a593Smuzhiyun unsigned int i, h;
681*4882a593Smuzhiyun
682*4882a593Smuzhiyun /* Rehash when load factor exceeds 0.75 */
683*4882a593Smuzhiyun if (clhash->hashelems * 4 <= clhash->hashsize * 3)
684*4882a593Smuzhiyun return;
685*4882a593Smuzhiyun nsize = clhash->hashsize * 2;
686*4882a593Smuzhiyun nmask = nsize - 1;
687*4882a593Smuzhiyun nhash = qdisc_class_hash_alloc(nsize);
688*4882a593Smuzhiyun if (nhash == NULL)
689*4882a593Smuzhiyun return;
690*4882a593Smuzhiyun
691*4882a593Smuzhiyun ohash = clhash->hash;
692*4882a593Smuzhiyun osize = clhash->hashsize;
693*4882a593Smuzhiyun
694*4882a593Smuzhiyun sch_tree_lock(sch);
695*4882a593Smuzhiyun for (i = 0; i < osize; i++) {
696*4882a593Smuzhiyun hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
697*4882a593Smuzhiyun h = qdisc_class_hash(cl->classid, nmask);
698*4882a593Smuzhiyun hlist_add_head(&cl->hnode, &nhash[h]);
699*4882a593Smuzhiyun }
700*4882a593Smuzhiyun }
701*4882a593Smuzhiyun clhash->hash = nhash;
702*4882a593Smuzhiyun clhash->hashsize = nsize;
703*4882a593Smuzhiyun clhash->hashmask = nmask;
704*4882a593Smuzhiyun sch_tree_unlock(sch);
705*4882a593Smuzhiyun
706*4882a593Smuzhiyun kvfree(ohash);
707*4882a593Smuzhiyun }
708*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_class_hash_grow);
709*4882a593Smuzhiyun
qdisc_class_hash_init(struct Qdisc_class_hash * clhash)710*4882a593Smuzhiyun int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
711*4882a593Smuzhiyun {
712*4882a593Smuzhiyun unsigned int size = 4;
713*4882a593Smuzhiyun
714*4882a593Smuzhiyun clhash->hash = qdisc_class_hash_alloc(size);
715*4882a593Smuzhiyun if (!clhash->hash)
716*4882a593Smuzhiyun return -ENOMEM;
717*4882a593Smuzhiyun clhash->hashsize = size;
718*4882a593Smuzhiyun clhash->hashmask = size - 1;
719*4882a593Smuzhiyun clhash->hashelems = 0;
720*4882a593Smuzhiyun return 0;
721*4882a593Smuzhiyun }
722*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_class_hash_init);
723*4882a593Smuzhiyun
qdisc_class_hash_destroy(struct Qdisc_class_hash * clhash)724*4882a593Smuzhiyun void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
725*4882a593Smuzhiyun {
726*4882a593Smuzhiyun kvfree(clhash->hash);
727*4882a593Smuzhiyun }
728*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_class_hash_destroy);
729*4882a593Smuzhiyun
qdisc_class_hash_insert(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)730*4882a593Smuzhiyun void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
731*4882a593Smuzhiyun struct Qdisc_class_common *cl)
732*4882a593Smuzhiyun {
733*4882a593Smuzhiyun unsigned int h;
734*4882a593Smuzhiyun
735*4882a593Smuzhiyun INIT_HLIST_NODE(&cl->hnode);
736*4882a593Smuzhiyun h = qdisc_class_hash(cl->classid, clhash->hashmask);
737*4882a593Smuzhiyun hlist_add_head(&cl->hnode, &clhash->hash[h]);
738*4882a593Smuzhiyun clhash->hashelems++;
739*4882a593Smuzhiyun }
740*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_class_hash_insert);
741*4882a593Smuzhiyun
qdisc_class_hash_remove(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)742*4882a593Smuzhiyun void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
743*4882a593Smuzhiyun struct Qdisc_class_common *cl)
744*4882a593Smuzhiyun {
745*4882a593Smuzhiyun hlist_del(&cl->hnode);
746*4882a593Smuzhiyun clhash->hashelems--;
747*4882a593Smuzhiyun }
748*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_class_hash_remove);
749*4882a593Smuzhiyun
750*4882a593Smuzhiyun /* Allocate an unique handle from space managed by kernel
751*4882a593Smuzhiyun * Possible range is [8000-FFFF]:0000 (0x8000 values)
752*4882a593Smuzhiyun */
qdisc_alloc_handle(struct net_device * dev)753*4882a593Smuzhiyun static u32 qdisc_alloc_handle(struct net_device *dev)
754*4882a593Smuzhiyun {
755*4882a593Smuzhiyun int i = 0x8000;
756*4882a593Smuzhiyun static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
757*4882a593Smuzhiyun
758*4882a593Smuzhiyun do {
759*4882a593Smuzhiyun autohandle += TC_H_MAKE(0x10000U, 0);
760*4882a593Smuzhiyun if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
761*4882a593Smuzhiyun autohandle = TC_H_MAKE(0x80000000U, 0);
762*4882a593Smuzhiyun if (!qdisc_lookup(dev, autohandle))
763*4882a593Smuzhiyun return autohandle;
764*4882a593Smuzhiyun cond_resched();
765*4882a593Smuzhiyun } while (--i > 0);
766*4882a593Smuzhiyun
767*4882a593Smuzhiyun return 0;
768*4882a593Smuzhiyun }
769*4882a593Smuzhiyun
qdisc_tree_reduce_backlog(struct Qdisc * sch,int n,int len)770*4882a593Smuzhiyun void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
771*4882a593Smuzhiyun {
772*4882a593Smuzhiyun bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
773*4882a593Smuzhiyun const struct Qdisc_class_ops *cops;
774*4882a593Smuzhiyun unsigned long cl;
775*4882a593Smuzhiyun u32 parentid;
776*4882a593Smuzhiyun bool notify;
777*4882a593Smuzhiyun int drops;
778*4882a593Smuzhiyun
779*4882a593Smuzhiyun if (n == 0 && len == 0)
780*4882a593Smuzhiyun return;
781*4882a593Smuzhiyun drops = max_t(int, n, 0);
782*4882a593Smuzhiyun rcu_read_lock();
783*4882a593Smuzhiyun while ((parentid = sch->parent)) {
784*4882a593Smuzhiyun if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
785*4882a593Smuzhiyun break;
786*4882a593Smuzhiyun
787*4882a593Smuzhiyun if (sch->flags & TCQ_F_NOPARENT)
788*4882a593Smuzhiyun break;
789*4882a593Smuzhiyun /* Notify parent qdisc only if child qdisc becomes empty.
790*4882a593Smuzhiyun *
791*4882a593Smuzhiyun * If child was empty even before update then backlog
792*4882a593Smuzhiyun * counter is screwed and we skip notification because
793*4882a593Smuzhiyun * parent class is already passive.
794*4882a593Smuzhiyun *
795*4882a593Smuzhiyun * If the original child was offloaded then it is allowed
796*4882a593Smuzhiyun * to be seem as empty, so the parent is notified anyway.
797*4882a593Smuzhiyun */
798*4882a593Smuzhiyun notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
799*4882a593Smuzhiyun !qdisc_is_offloaded);
800*4882a593Smuzhiyun /* TODO: perform the search on a per txq basis */
801*4882a593Smuzhiyun sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
802*4882a593Smuzhiyun if (sch == NULL) {
803*4882a593Smuzhiyun WARN_ON_ONCE(parentid != TC_H_ROOT);
804*4882a593Smuzhiyun break;
805*4882a593Smuzhiyun }
806*4882a593Smuzhiyun cops = sch->ops->cl_ops;
807*4882a593Smuzhiyun if (notify && cops->qlen_notify) {
808*4882a593Smuzhiyun cl = cops->find(sch, parentid);
809*4882a593Smuzhiyun cops->qlen_notify(sch, cl);
810*4882a593Smuzhiyun }
811*4882a593Smuzhiyun sch->q.qlen -= n;
812*4882a593Smuzhiyun sch->qstats.backlog -= len;
813*4882a593Smuzhiyun __qdisc_qstats_drop(sch, drops);
814*4882a593Smuzhiyun }
815*4882a593Smuzhiyun rcu_read_unlock();
816*4882a593Smuzhiyun }
817*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
818*4882a593Smuzhiyun
qdisc_offload_dump_helper(struct Qdisc * sch,enum tc_setup_type type,void * type_data)819*4882a593Smuzhiyun int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
820*4882a593Smuzhiyun void *type_data)
821*4882a593Smuzhiyun {
822*4882a593Smuzhiyun struct net_device *dev = qdisc_dev(sch);
823*4882a593Smuzhiyun int err;
824*4882a593Smuzhiyun
825*4882a593Smuzhiyun sch->flags &= ~TCQ_F_OFFLOADED;
826*4882a593Smuzhiyun if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
827*4882a593Smuzhiyun return 0;
828*4882a593Smuzhiyun
829*4882a593Smuzhiyun err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
830*4882a593Smuzhiyun if (err == -EOPNOTSUPP)
831*4882a593Smuzhiyun return 0;
832*4882a593Smuzhiyun
833*4882a593Smuzhiyun if (!err)
834*4882a593Smuzhiyun sch->flags |= TCQ_F_OFFLOADED;
835*4882a593Smuzhiyun
836*4882a593Smuzhiyun return err;
837*4882a593Smuzhiyun }
838*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_offload_dump_helper);
839*4882a593Smuzhiyun
qdisc_offload_graft_helper(struct net_device * dev,struct Qdisc * sch,struct Qdisc * new,struct Qdisc * old,enum tc_setup_type type,void * type_data,struct netlink_ext_ack * extack)840*4882a593Smuzhiyun void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
841*4882a593Smuzhiyun struct Qdisc *new, struct Qdisc *old,
842*4882a593Smuzhiyun enum tc_setup_type type, void *type_data,
843*4882a593Smuzhiyun struct netlink_ext_ack *extack)
844*4882a593Smuzhiyun {
845*4882a593Smuzhiyun bool any_qdisc_is_offloaded;
846*4882a593Smuzhiyun int err;
847*4882a593Smuzhiyun
848*4882a593Smuzhiyun if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
849*4882a593Smuzhiyun return;
850*4882a593Smuzhiyun
851*4882a593Smuzhiyun err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
852*4882a593Smuzhiyun
853*4882a593Smuzhiyun /* Don't report error if the graft is part of destroy operation. */
854*4882a593Smuzhiyun if (!err || !new || new == &noop_qdisc)
855*4882a593Smuzhiyun return;
856*4882a593Smuzhiyun
857*4882a593Smuzhiyun /* Don't report error if the parent, the old child and the new
858*4882a593Smuzhiyun * one are not offloaded.
859*4882a593Smuzhiyun */
860*4882a593Smuzhiyun any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
861*4882a593Smuzhiyun any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
862*4882a593Smuzhiyun any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
863*4882a593Smuzhiyun
864*4882a593Smuzhiyun if (any_qdisc_is_offloaded)
865*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
866*4882a593Smuzhiyun }
867*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_offload_graft_helper);
868*4882a593Smuzhiyun
qdisc_offload_graft_root(struct net_device * dev,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)869*4882a593Smuzhiyun static void qdisc_offload_graft_root(struct net_device *dev,
870*4882a593Smuzhiyun struct Qdisc *new, struct Qdisc *old,
871*4882a593Smuzhiyun struct netlink_ext_ack *extack)
872*4882a593Smuzhiyun {
873*4882a593Smuzhiyun struct tc_root_qopt_offload graft_offload = {
874*4882a593Smuzhiyun .command = TC_ROOT_GRAFT,
875*4882a593Smuzhiyun .handle = new ? new->handle : 0,
876*4882a593Smuzhiyun .ingress = (new && new->flags & TCQ_F_INGRESS) ||
877*4882a593Smuzhiyun (old && old->flags & TCQ_F_INGRESS),
878*4882a593Smuzhiyun };
879*4882a593Smuzhiyun
880*4882a593Smuzhiyun qdisc_offload_graft_helper(dev, NULL, new, old,
881*4882a593Smuzhiyun TC_SETUP_ROOT_QDISC, &graft_offload, extack);
882*4882a593Smuzhiyun }
883*4882a593Smuzhiyun
tc_fill_qdisc(struct sk_buff * skb,struct Qdisc * q,u32 clid,u32 portid,u32 seq,u16 flags,int event)884*4882a593Smuzhiyun static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
885*4882a593Smuzhiyun u32 portid, u32 seq, u16 flags, int event)
886*4882a593Smuzhiyun {
887*4882a593Smuzhiyun struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
888*4882a593Smuzhiyun struct gnet_stats_queue __percpu *cpu_qstats = NULL;
889*4882a593Smuzhiyun struct tcmsg *tcm;
890*4882a593Smuzhiyun struct nlmsghdr *nlh;
891*4882a593Smuzhiyun unsigned char *b = skb_tail_pointer(skb);
892*4882a593Smuzhiyun struct gnet_dump d;
893*4882a593Smuzhiyun struct qdisc_size_table *stab;
894*4882a593Smuzhiyun u32 block_index;
895*4882a593Smuzhiyun __u32 qlen;
896*4882a593Smuzhiyun
897*4882a593Smuzhiyun cond_resched();
898*4882a593Smuzhiyun nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
899*4882a593Smuzhiyun if (!nlh)
900*4882a593Smuzhiyun goto out_nlmsg_trim;
901*4882a593Smuzhiyun tcm = nlmsg_data(nlh);
902*4882a593Smuzhiyun tcm->tcm_family = AF_UNSPEC;
903*4882a593Smuzhiyun tcm->tcm__pad1 = 0;
904*4882a593Smuzhiyun tcm->tcm__pad2 = 0;
905*4882a593Smuzhiyun tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
906*4882a593Smuzhiyun tcm->tcm_parent = clid;
907*4882a593Smuzhiyun tcm->tcm_handle = q->handle;
908*4882a593Smuzhiyun tcm->tcm_info = refcount_read(&q->refcnt);
909*4882a593Smuzhiyun if (nla_put_string(skb, TCA_KIND, q->ops->id))
910*4882a593Smuzhiyun goto nla_put_failure;
911*4882a593Smuzhiyun if (q->ops->ingress_block_get) {
912*4882a593Smuzhiyun block_index = q->ops->ingress_block_get(q);
913*4882a593Smuzhiyun if (block_index &&
914*4882a593Smuzhiyun nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
915*4882a593Smuzhiyun goto nla_put_failure;
916*4882a593Smuzhiyun }
917*4882a593Smuzhiyun if (q->ops->egress_block_get) {
918*4882a593Smuzhiyun block_index = q->ops->egress_block_get(q);
919*4882a593Smuzhiyun if (block_index &&
920*4882a593Smuzhiyun nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
921*4882a593Smuzhiyun goto nla_put_failure;
922*4882a593Smuzhiyun }
923*4882a593Smuzhiyun if (q->ops->dump && q->ops->dump(q, skb) < 0)
924*4882a593Smuzhiyun goto nla_put_failure;
925*4882a593Smuzhiyun if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
926*4882a593Smuzhiyun goto nla_put_failure;
927*4882a593Smuzhiyun qlen = qdisc_qlen_sum(q);
928*4882a593Smuzhiyun
929*4882a593Smuzhiyun stab = rtnl_dereference(q->stab);
930*4882a593Smuzhiyun if (stab && qdisc_dump_stab(skb, stab) < 0)
931*4882a593Smuzhiyun goto nla_put_failure;
932*4882a593Smuzhiyun
933*4882a593Smuzhiyun if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
934*4882a593Smuzhiyun NULL, &d, TCA_PAD) < 0)
935*4882a593Smuzhiyun goto nla_put_failure;
936*4882a593Smuzhiyun
937*4882a593Smuzhiyun if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
938*4882a593Smuzhiyun goto nla_put_failure;
939*4882a593Smuzhiyun
940*4882a593Smuzhiyun if (qdisc_is_percpu_stats(q)) {
941*4882a593Smuzhiyun cpu_bstats = q->cpu_bstats;
942*4882a593Smuzhiyun cpu_qstats = q->cpu_qstats;
943*4882a593Smuzhiyun }
944*4882a593Smuzhiyun
945*4882a593Smuzhiyun if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
946*4882a593Smuzhiyun &d, cpu_bstats, &q->bstats) < 0 ||
947*4882a593Smuzhiyun gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
948*4882a593Smuzhiyun gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
949*4882a593Smuzhiyun goto nla_put_failure;
950*4882a593Smuzhiyun
951*4882a593Smuzhiyun if (gnet_stats_finish_copy(&d) < 0)
952*4882a593Smuzhiyun goto nla_put_failure;
953*4882a593Smuzhiyun
954*4882a593Smuzhiyun nlh->nlmsg_len = skb_tail_pointer(skb) - b;
955*4882a593Smuzhiyun return skb->len;
956*4882a593Smuzhiyun
957*4882a593Smuzhiyun out_nlmsg_trim:
958*4882a593Smuzhiyun nla_put_failure:
959*4882a593Smuzhiyun nlmsg_trim(skb, b);
960*4882a593Smuzhiyun return -1;
961*4882a593Smuzhiyun }
962*4882a593Smuzhiyun
tc_qdisc_dump_ignore(struct Qdisc * q,bool dump_invisible)963*4882a593Smuzhiyun static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
964*4882a593Smuzhiyun {
965*4882a593Smuzhiyun if (q->flags & TCQ_F_BUILTIN)
966*4882a593Smuzhiyun return true;
967*4882a593Smuzhiyun if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
968*4882a593Smuzhiyun return true;
969*4882a593Smuzhiyun
970*4882a593Smuzhiyun return false;
971*4882a593Smuzhiyun }
972*4882a593Smuzhiyun
qdisc_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new)973*4882a593Smuzhiyun static int qdisc_notify(struct net *net, struct sk_buff *oskb,
974*4882a593Smuzhiyun struct nlmsghdr *n, u32 clid,
975*4882a593Smuzhiyun struct Qdisc *old, struct Qdisc *new)
976*4882a593Smuzhiyun {
977*4882a593Smuzhiyun struct sk_buff *skb;
978*4882a593Smuzhiyun u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
979*4882a593Smuzhiyun
980*4882a593Smuzhiyun skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
981*4882a593Smuzhiyun if (!skb)
982*4882a593Smuzhiyun return -ENOBUFS;
983*4882a593Smuzhiyun
984*4882a593Smuzhiyun if (old && !tc_qdisc_dump_ignore(old, false)) {
985*4882a593Smuzhiyun if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
986*4882a593Smuzhiyun 0, RTM_DELQDISC) < 0)
987*4882a593Smuzhiyun goto err_out;
988*4882a593Smuzhiyun }
989*4882a593Smuzhiyun if (new && !tc_qdisc_dump_ignore(new, false)) {
990*4882a593Smuzhiyun if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
991*4882a593Smuzhiyun old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
992*4882a593Smuzhiyun goto err_out;
993*4882a593Smuzhiyun }
994*4882a593Smuzhiyun
995*4882a593Smuzhiyun if (skb->len)
996*4882a593Smuzhiyun return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
997*4882a593Smuzhiyun n->nlmsg_flags & NLM_F_ECHO);
998*4882a593Smuzhiyun
999*4882a593Smuzhiyun err_out:
1000*4882a593Smuzhiyun kfree_skb(skb);
1001*4882a593Smuzhiyun return -EINVAL;
1002*4882a593Smuzhiyun }
1003*4882a593Smuzhiyun
notify_and_destroy(struct net * net,struct sk_buff * skb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new)1004*4882a593Smuzhiyun static void notify_and_destroy(struct net *net, struct sk_buff *skb,
1005*4882a593Smuzhiyun struct nlmsghdr *n, u32 clid,
1006*4882a593Smuzhiyun struct Qdisc *old, struct Qdisc *new)
1007*4882a593Smuzhiyun {
1008*4882a593Smuzhiyun if (new || old)
1009*4882a593Smuzhiyun qdisc_notify(net, skb, n, clid, old, new);
1010*4882a593Smuzhiyun
1011*4882a593Smuzhiyun if (old)
1012*4882a593Smuzhiyun qdisc_put(old);
1013*4882a593Smuzhiyun }
1014*4882a593Smuzhiyun
qdisc_clear_nolock(struct Qdisc * sch)1015*4882a593Smuzhiyun static void qdisc_clear_nolock(struct Qdisc *sch)
1016*4882a593Smuzhiyun {
1017*4882a593Smuzhiyun sch->flags &= ~TCQ_F_NOLOCK;
1018*4882a593Smuzhiyun if (!(sch->flags & TCQ_F_CPUSTATS))
1019*4882a593Smuzhiyun return;
1020*4882a593Smuzhiyun
1021*4882a593Smuzhiyun free_percpu(sch->cpu_bstats);
1022*4882a593Smuzhiyun free_percpu(sch->cpu_qstats);
1023*4882a593Smuzhiyun sch->cpu_bstats = NULL;
1024*4882a593Smuzhiyun sch->cpu_qstats = NULL;
1025*4882a593Smuzhiyun sch->flags &= ~TCQ_F_CPUSTATS;
1026*4882a593Smuzhiyun }
1027*4882a593Smuzhiyun
1028*4882a593Smuzhiyun /* Graft qdisc "new" to class "classid" of qdisc "parent" or
1029*4882a593Smuzhiyun * to device "dev".
1030*4882a593Smuzhiyun *
1031*4882a593Smuzhiyun * When appropriate send a netlink notification using 'skb'
1032*4882a593Smuzhiyun * and "n".
1033*4882a593Smuzhiyun *
1034*4882a593Smuzhiyun * On success, destroy old qdisc.
1035*4882a593Smuzhiyun */
1036*4882a593Smuzhiyun
qdisc_graft(struct net_device * dev,struct Qdisc * parent,struct sk_buff * skb,struct nlmsghdr * n,u32 classid,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)1037*4882a593Smuzhiyun static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1038*4882a593Smuzhiyun struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1039*4882a593Smuzhiyun struct Qdisc *new, struct Qdisc *old,
1040*4882a593Smuzhiyun struct netlink_ext_ack *extack)
1041*4882a593Smuzhiyun {
1042*4882a593Smuzhiyun struct Qdisc *q = old;
1043*4882a593Smuzhiyun struct net *net = dev_net(dev);
1044*4882a593Smuzhiyun
1045*4882a593Smuzhiyun if (parent == NULL) {
1046*4882a593Smuzhiyun unsigned int i, num_q, ingress;
1047*4882a593Smuzhiyun
1048*4882a593Smuzhiyun ingress = 0;
1049*4882a593Smuzhiyun num_q = dev->num_tx_queues;
1050*4882a593Smuzhiyun if ((q && q->flags & TCQ_F_INGRESS) ||
1051*4882a593Smuzhiyun (new && new->flags & TCQ_F_INGRESS)) {
1052*4882a593Smuzhiyun num_q = 1;
1053*4882a593Smuzhiyun ingress = 1;
1054*4882a593Smuzhiyun if (!dev_ingress_queue(dev)) {
1055*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1056*4882a593Smuzhiyun return -ENOENT;
1057*4882a593Smuzhiyun }
1058*4882a593Smuzhiyun }
1059*4882a593Smuzhiyun
1060*4882a593Smuzhiyun if (dev->flags & IFF_UP)
1061*4882a593Smuzhiyun dev_deactivate(dev);
1062*4882a593Smuzhiyun
1063*4882a593Smuzhiyun qdisc_offload_graft_root(dev, new, old, extack);
1064*4882a593Smuzhiyun
1065*4882a593Smuzhiyun if (new && new->ops->attach)
1066*4882a593Smuzhiyun goto skip;
1067*4882a593Smuzhiyun
1068*4882a593Smuzhiyun for (i = 0; i < num_q; i++) {
1069*4882a593Smuzhiyun struct netdev_queue *dev_queue = dev_ingress_queue(dev);
1070*4882a593Smuzhiyun
1071*4882a593Smuzhiyun if (!ingress)
1072*4882a593Smuzhiyun dev_queue = netdev_get_tx_queue(dev, i);
1073*4882a593Smuzhiyun
1074*4882a593Smuzhiyun old = dev_graft_qdisc(dev_queue, new);
1075*4882a593Smuzhiyun if (new && i > 0)
1076*4882a593Smuzhiyun qdisc_refcount_inc(new);
1077*4882a593Smuzhiyun
1078*4882a593Smuzhiyun if (!ingress)
1079*4882a593Smuzhiyun qdisc_put(old);
1080*4882a593Smuzhiyun }
1081*4882a593Smuzhiyun
1082*4882a593Smuzhiyun skip:
1083*4882a593Smuzhiyun if (!ingress) {
1084*4882a593Smuzhiyun old = rtnl_dereference(dev->qdisc);
1085*4882a593Smuzhiyun if (new && !new->ops->attach)
1086*4882a593Smuzhiyun qdisc_refcount_inc(new);
1087*4882a593Smuzhiyun rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
1088*4882a593Smuzhiyun
1089*4882a593Smuzhiyun notify_and_destroy(net, skb, n, classid, old, new);
1090*4882a593Smuzhiyun
1091*4882a593Smuzhiyun if (new && new->ops->attach)
1092*4882a593Smuzhiyun new->ops->attach(new);
1093*4882a593Smuzhiyun } else {
1094*4882a593Smuzhiyun notify_and_destroy(net, skb, n, classid, old, new);
1095*4882a593Smuzhiyun }
1096*4882a593Smuzhiyun
1097*4882a593Smuzhiyun if (dev->flags & IFF_UP)
1098*4882a593Smuzhiyun dev_activate(dev);
1099*4882a593Smuzhiyun } else {
1100*4882a593Smuzhiyun const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1101*4882a593Smuzhiyun unsigned long cl;
1102*4882a593Smuzhiyun int err;
1103*4882a593Smuzhiyun
1104*4882a593Smuzhiyun /* Only support running class lockless if parent is lockless */
1105*4882a593Smuzhiyun if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
1106*4882a593Smuzhiyun qdisc_clear_nolock(new);
1107*4882a593Smuzhiyun
1108*4882a593Smuzhiyun if (!cops || !cops->graft)
1109*4882a593Smuzhiyun return -EOPNOTSUPP;
1110*4882a593Smuzhiyun
1111*4882a593Smuzhiyun cl = cops->find(parent, classid);
1112*4882a593Smuzhiyun if (!cl) {
1113*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Specified class not found");
1114*4882a593Smuzhiyun return -ENOENT;
1115*4882a593Smuzhiyun }
1116*4882a593Smuzhiyun
1117*4882a593Smuzhiyun err = cops->graft(parent, cl, new, &old, extack);
1118*4882a593Smuzhiyun if (err)
1119*4882a593Smuzhiyun return err;
1120*4882a593Smuzhiyun notify_and_destroy(net, skb, n, classid, old, new);
1121*4882a593Smuzhiyun }
1122*4882a593Smuzhiyun return 0;
1123*4882a593Smuzhiyun }
1124*4882a593Smuzhiyun
qdisc_block_indexes_set(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)1125*4882a593Smuzhiyun static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1126*4882a593Smuzhiyun struct netlink_ext_ack *extack)
1127*4882a593Smuzhiyun {
1128*4882a593Smuzhiyun u32 block_index;
1129*4882a593Smuzhiyun
1130*4882a593Smuzhiyun if (tca[TCA_INGRESS_BLOCK]) {
1131*4882a593Smuzhiyun block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1132*4882a593Smuzhiyun
1133*4882a593Smuzhiyun if (!block_index) {
1134*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1135*4882a593Smuzhiyun return -EINVAL;
1136*4882a593Smuzhiyun }
1137*4882a593Smuzhiyun if (!sch->ops->ingress_block_set) {
1138*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1139*4882a593Smuzhiyun return -EOPNOTSUPP;
1140*4882a593Smuzhiyun }
1141*4882a593Smuzhiyun sch->ops->ingress_block_set(sch, block_index);
1142*4882a593Smuzhiyun }
1143*4882a593Smuzhiyun if (tca[TCA_EGRESS_BLOCK]) {
1144*4882a593Smuzhiyun block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1145*4882a593Smuzhiyun
1146*4882a593Smuzhiyun if (!block_index) {
1147*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1148*4882a593Smuzhiyun return -EINVAL;
1149*4882a593Smuzhiyun }
1150*4882a593Smuzhiyun if (!sch->ops->egress_block_set) {
1151*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1152*4882a593Smuzhiyun return -EOPNOTSUPP;
1153*4882a593Smuzhiyun }
1154*4882a593Smuzhiyun sch->ops->egress_block_set(sch, block_index);
1155*4882a593Smuzhiyun }
1156*4882a593Smuzhiyun return 0;
1157*4882a593Smuzhiyun }
1158*4882a593Smuzhiyun
1159*4882a593Smuzhiyun /*
1160*4882a593Smuzhiyun Allocate and initialize new qdisc.
1161*4882a593Smuzhiyun
1162*4882a593Smuzhiyun Parameters are passed via opt.
1163*4882a593Smuzhiyun */
1164*4882a593Smuzhiyun
qdisc_create(struct net_device * dev,struct netdev_queue * dev_queue,struct Qdisc * p,u32 parent,u32 handle,struct nlattr ** tca,int * errp,struct netlink_ext_ack * extack)1165*4882a593Smuzhiyun static struct Qdisc *qdisc_create(struct net_device *dev,
1166*4882a593Smuzhiyun struct netdev_queue *dev_queue,
1167*4882a593Smuzhiyun struct Qdisc *p, u32 parent, u32 handle,
1168*4882a593Smuzhiyun struct nlattr **tca, int *errp,
1169*4882a593Smuzhiyun struct netlink_ext_ack *extack)
1170*4882a593Smuzhiyun {
1171*4882a593Smuzhiyun int err;
1172*4882a593Smuzhiyun struct nlattr *kind = tca[TCA_KIND];
1173*4882a593Smuzhiyun struct Qdisc *sch;
1174*4882a593Smuzhiyun struct Qdisc_ops *ops;
1175*4882a593Smuzhiyun struct qdisc_size_table *stab;
1176*4882a593Smuzhiyun
1177*4882a593Smuzhiyun ops = qdisc_lookup_ops(kind);
1178*4882a593Smuzhiyun #ifdef CONFIG_MODULES
1179*4882a593Smuzhiyun if (ops == NULL && kind != NULL) {
1180*4882a593Smuzhiyun char name[IFNAMSIZ];
1181*4882a593Smuzhiyun if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1182*4882a593Smuzhiyun /* We dropped the RTNL semaphore in order to
1183*4882a593Smuzhiyun * perform the module load. So, even if we
1184*4882a593Smuzhiyun * succeeded in loading the module we have to
1185*4882a593Smuzhiyun * tell the caller to replay the request. We
1186*4882a593Smuzhiyun * indicate this using -EAGAIN.
1187*4882a593Smuzhiyun * We replay the request because the device may
1188*4882a593Smuzhiyun * go away in the mean time.
1189*4882a593Smuzhiyun */
1190*4882a593Smuzhiyun rtnl_unlock();
1191*4882a593Smuzhiyun request_module("sch_%s", name);
1192*4882a593Smuzhiyun rtnl_lock();
1193*4882a593Smuzhiyun ops = qdisc_lookup_ops(kind);
1194*4882a593Smuzhiyun if (ops != NULL) {
1195*4882a593Smuzhiyun /* We will try again qdisc_lookup_ops,
1196*4882a593Smuzhiyun * so don't keep a reference.
1197*4882a593Smuzhiyun */
1198*4882a593Smuzhiyun module_put(ops->owner);
1199*4882a593Smuzhiyun err = -EAGAIN;
1200*4882a593Smuzhiyun goto err_out;
1201*4882a593Smuzhiyun }
1202*4882a593Smuzhiyun }
1203*4882a593Smuzhiyun }
1204*4882a593Smuzhiyun #endif
1205*4882a593Smuzhiyun
1206*4882a593Smuzhiyun err = -ENOENT;
1207*4882a593Smuzhiyun if (!ops) {
1208*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
1209*4882a593Smuzhiyun goto err_out;
1210*4882a593Smuzhiyun }
1211*4882a593Smuzhiyun
1212*4882a593Smuzhiyun sch = qdisc_alloc(dev_queue, ops, extack);
1213*4882a593Smuzhiyun if (IS_ERR(sch)) {
1214*4882a593Smuzhiyun err = PTR_ERR(sch);
1215*4882a593Smuzhiyun goto err_out2;
1216*4882a593Smuzhiyun }
1217*4882a593Smuzhiyun
1218*4882a593Smuzhiyun sch->parent = parent;
1219*4882a593Smuzhiyun
1220*4882a593Smuzhiyun if (handle == TC_H_INGRESS) {
1221*4882a593Smuzhiyun sch->flags |= TCQ_F_INGRESS;
1222*4882a593Smuzhiyun handle = TC_H_MAKE(TC_H_INGRESS, 0);
1223*4882a593Smuzhiyun } else {
1224*4882a593Smuzhiyun if (handle == 0) {
1225*4882a593Smuzhiyun handle = qdisc_alloc_handle(dev);
1226*4882a593Smuzhiyun if (handle == 0) {
1227*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1228*4882a593Smuzhiyun err = -ENOSPC;
1229*4882a593Smuzhiyun goto err_out3;
1230*4882a593Smuzhiyun }
1231*4882a593Smuzhiyun }
1232*4882a593Smuzhiyun if (!netif_is_multiqueue(dev))
1233*4882a593Smuzhiyun sch->flags |= TCQ_F_ONETXQUEUE;
1234*4882a593Smuzhiyun }
1235*4882a593Smuzhiyun
1236*4882a593Smuzhiyun sch->handle = handle;
1237*4882a593Smuzhiyun
1238*4882a593Smuzhiyun /* This exist to keep backward compatible with a userspace
1239*4882a593Smuzhiyun * loophole, what allowed userspace to get IFF_NO_QUEUE
1240*4882a593Smuzhiyun * facility on older kernels by setting tx_queue_len=0 (prior
1241*4882a593Smuzhiyun * to qdisc init), and then forgot to reinit tx_queue_len
1242*4882a593Smuzhiyun * before again attaching a qdisc.
1243*4882a593Smuzhiyun */
1244*4882a593Smuzhiyun if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1245*4882a593Smuzhiyun dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1246*4882a593Smuzhiyun netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1247*4882a593Smuzhiyun }
1248*4882a593Smuzhiyun
1249*4882a593Smuzhiyun err = qdisc_block_indexes_set(sch, tca, extack);
1250*4882a593Smuzhiyun if (err)
1251*4882a593Smuzhiyun goto err_out3;
1252*4882a593Smuzhiyun
1253*4882a593Smuzhiyun if (ops->init) {
1254*4882a593Smuzhiyun err = ops->init(sch, tca[TCA_OPTIONS], extack);
1255*4882a593Smuzhiyun if (err != 0)
1256*4882a593Smuzhiyun goto err_out5;
1257*4882a593Smuzhiyun }
1258*4882a593Smuzhiyun
1259*4882a593Smuzhiyun if (tca[TCA_STAB]) {
1260*4882a593Smuzhiyun stab = qdisc_get_stab(tca[TCA_STAB], extack);
1261*4882a593Smuzhiyun if (IS_ERR(stab)) {
1262*4882a593Smuzhiyun err = PTR_ERR(stab);
1263*4882a593Smuzhiyun goto err_out4;
1264*4882a593Smuzhiyun }
1265*4882a593Smuzhiyun rcu_assign_pointer(sch->stab, stab);
1266*4882a593Smuzhiyun }
1267*4882a593Smuzhiyun if (tca[TCA_RATE]) {
1268*4882a593Smuzhiyun seqcount_t *running;
1269*4882a593Smuzhiyun
1270*4882a593Smuzhiyun err = -EOPNOTSUPP;
1271*4882a593Smuzhiyun if (sch->flags & TCQ_F_MQROOT) {
1272*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1273*4882a593Smuzhiyun goto err_out4;
1274*4882a593Smuzhiyun }
1275*4882a593Smuzhiyun
1276*4882a593Smuzhiyun if (sch->parent != TC_H_ROOT &&
1277*4882a593Smuzhiyun !(sch->flags & TCQ_F_INGRESS) &&
1278*4882a593Smuzhiyun (!p || !(p->flags & TCQ_F_MQROOT)))
1279*4882a593Smuzhiyun running = qdisc_root_sleeping_running(sch);
1280*4882a593Smuzhiyun else
1281*4882a593Smuzhiyun running = &sch->running;
1282*4882a593Smuzhiyun
1283*4882a593Smuzhiyun err = gen_new_estimator(&sch->bstats,
1284*4882a593Smuzhiyun sch->cpu_bstats,
1285*4882a593Smuzhiyun &sch->rate_est,
1286*4882a593Smuzhiyun NULL,
1287*4882a593Smuzhiyun running,
1288*4882a593Smuzhiyun tca[TCA_RATE]);
1289*4882a593Smuzhiyun if (err) {
1290*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1291*4882a593Smuzhiyun goto err_out4;
1292*4882a593Smuzhiyun }
1293*4882a593Smuzhiyun }
1294*4882a593Smuzhiyun
1295*4882a593Smuzhiyun qdisc_hash_add(sch, false);
1296*4882a593Smuzhiyun trace_qdisc_create(ops, dev, parent);
1297*4882a593Smuzhiyun
1298*4882a593Smuzhiyun return sch;
1299*4882a593Smuzhiyun
1300*4882a593Smuzhiyun err_out5:
1301*4882a593Smuzhiyun /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1302*4882a593Smuzhiyun if (ops->destroy)
1303*4882a593Smuzhiyun ops->destroy(sch);
1304*4882a593Smuzhiyun err_out3:
1305*4882a593Smuzhiyun dev_put(dev);
1306*4882a593Smuzhiyun qdisc_free(sch);
1307*4882a593Smuzhiyun err_out2:
1308*4882a593Smuzhiyun module_put(ops->owner);
1309*4882a593Smuzhiyun err_out:
1310*4882a593Smuzhiyun *errp = err;
1311*4882a593Smuzhiyun return NULL;
1312*4882a593Smuzhiyun
1313*4882a593Smuzhiyun err_out4:
1314*4882a593Smuzhiyun /*
1315*4882a593Smuzhiyun * Any broken qdiscs that would require a ops->reset() here?
1316*4882a593Smuzhiyun * The qdisc was never in action so it shouldn't be necessary.
1317*4882a593Smuzhiyun */
1318*4882a593Smuzhiyun qdisc_put_stab(rtnl_dereference(sch->stab));
1319*4882a593Smuzhiyun if (ops->destroy)
1320*4882a593Smuzhiyun ops->destroy(sch);
1321*4882a593Smuzhiyun goto err_out3;
1322*4882a593Smuzhiyun }
1323*4882a593Smuzhiyun
qdisc_change(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)1324*4882a593Smuzhiyun static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1325*4882a593Smuzhiyun struct netlink_ext_ack *extack)
1326*4882a593Smuzhiyun {
1327*4882a593Smuzhiyun struct qdisc_size_table *ostab, *stab = NULL;
1328*4882a593Smuzhiyun int err = 0;
1329*4882a593Smuzhiyun
1330*4882a593Smuzhiyun if (tca[TCA_OPTIONS]) {
1331*4882a593Smuzhiyun if (!sch->ops->change) {
1332*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1333*4882a593Smuzhiyun return -EINVAL;
1334*4882a593Smuzhiyun }
1335*4882a593Smuzhiyun if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1336*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1337*4882a593Smuzhiyun return -EOPNOTSUPP;
1338*4882a593Smuzhiyun }
1339*4882a593Smuzhiyun err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1340*4882a593Smuzhiyun if (err)
1341*4882a593Smuzhiyun return err;
1342*4882a593Smuzhiyun }
1343*4882a593Smuzhiyun
1344*4882a593Smuzhiyun if (tca[TCA_STAB]) {
1345*4882a593Smuzhiyun stab = qdisc_get_stab(tca[TCA_STAB], extack);
1346*4882a593Smuzhiyun if (IS_ERR(stab))
1347*4882a593Smuzhiyun return PTR_ERR(stab);
1348*4882a593Smuzhiyun }
1349*4882a593Smuzhiyun
1350*4882a593Smuzhiyun ostab = rtnl_dereference(sch->stab);
1351*4882a593Smuzhiyun rcu_assign_pointer(sch->stab, stab);
1352*4882a593Smuzhiyun qdisc_put_stab(ostab);
1353*4882a593Smuzhiyun
1354*4882a593Smuzhiyun if (tca[TCA_RATE]) {
1355*4882a593Smuzhiyun /* NB: ignores errors from replace_estimator
1356*4882a593Smuzhiyun because change can't be undone. */
1357*4882a593Smuzhiyun if (sch->flags & TCQ_F_MQROOT)
1358*4882a593Smuzhiyun goto out;
1359*4882a593Smuzhiyun gen_replace_estimator(&sch->bstats,
1360*4882a593Smuzhiyun sch->cpu_bstats,
1361*4882a593Smuzhiyun &sch->rate_est,
1362*4882a593Smuzhiyun NULL,
1363*4882a593Smuzhiyun qdisc_root_sleeping_running(sch),
1364*4882a593Smuzhiyun tca[TCA_RATE]);
1365*4882a593Smuzhiyun }
1366*4882a593Smuzhiyun out:
1367*4882a593Smuzhiyun return 0;
1368*4882a593Smuzhiyun }
1369*4882a593Smuzhiyun
1370*4882a593Smuzhiyun struct check_loop_arg {
1371*4882a593Smuzhiyun struct qdisc_walker w;
1372*4882a593Smuzhiyun struct Qdisc *p;
1373*4882a593Smuzhiyun int depth;
1374*4882a593Smuzhiyun };
1375*4882a593Smuzhiyun
1376*4882a593Smuzhiyun static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1377*4882a593Smuzhiyun struct qdisc_walker *w);
1378*4882a593Smuzhiyun
check_loop(struct Qdisc * q,struct Qdisc * p,int depth)1379*4882a593Smuzhiyun static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1380*4882a593Smuzhiyun {
1381*4882a593Smuzhiyun struct check_loop_arg arg;
1382*4882a593Smuzhiyun
1383*4882a593Smuzhiyun if (q->ops->cl_ops == NULL)
1384*4882a593Smuzhiyun return 0;
1385*4882a593Smuzhiyun
1386*4882a593Smuzhiyun arg.w.stop = arg.w.skip = arg.w.count = 0;
1387*4882a593Smuzhiyun arg.w.fn = check_loop_fn;
1388*4882a593Smuzhiyun arg.depth = depth;
1389*4882a593Smuzhiyun arg.p = p;
1390*4882a593Smuzhiyun q->ops->cl_ops->walk(q, &arg.w);
1391*4882a593Smuzhiyun return arg.w.stop ? -ELOOP : 0;
1392*4882a593Smuzhiyun }
1393*4882a593Smuzhiyun
1394*4882a593Smuzhiyun static int
check_loop_fn(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)1395*4882a593Smuzhiyun check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1396*4882a593Smuzhiyun {
1397*4882a593Smuzhiyun struct Qdisc *leaf;
1398*4882a593Smuzhiyun const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1399*4882a593Smuzhiyun struct check_loop_arg *arg = (struct check_loop_arg *)w;
1400*4882a593Smuzhiyun
1401*4882a593Smuzhiyun leaf = cops->leaf(q, cl);
1402*4882a593Smuzhiyun if (leaf) {
1403*4882a593Smuzhiyun if (leaf == arg->p || arg->depth > 7)
1404*4882a593Smuzhiyun return -ELOOP;
1405*4882a593Smuzhiyun return check_loop(leaf, arg->p, arg->depth + 1);
1406*4882a593Smuzhiyun }
1407*4882a593Smuzhiyun return 0;
1408*4882a593Smuzhiyun }
1409*4882a593Smuzhiyun
1410*4882a593Smuzhiyun const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1411*4882a593Smuzhiyun [TCA_KIND] = { .type = NLA_STRING },
1412*4882a593Smuzhiyun [TCA_RATE] = { .type = NLA_BINARY,
1413*4882a593Smuzhiyun .len = sizeof(struct tc_estimator) },
1414*4882a593Smuzhiyun [TCA_STAB] = { .type = NLA_NESTED },
1415*4882a593Smuzhiyun [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG },
1416*4882a593Smuzhiyun [TCA_CHAIN] = { .type = NLA_U32 },
1417*4882a593Smuzhiyun [TCA_INGRESS_BLOCK] = { .type = NLA_U32 },
1418*4882a593Smuzhiyun [TCA_EGRESS_BLOCK] = { .type = NLA_U32 },
1419*4882a593Smuzhiyun };
1420*4882a593Smuzhiyun
1421*4882a593Smuzhiyun /*
1422*4882a593Smuzhiyun * Delete/get qdisc.
1423*4882a593Smuzhiyun */
1424*4882a593Smuzhiyun
tc_get_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1425*4882a593Smuzhiyun static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1426*4882a593Smuzhiyun struct netlink_ext_ack *extack)
1427*4882a593Smuzhiyun {
1428*4882a593Smuzhiyun struct net *net = sock_net(skb->sk);
1429*4882a593Smuzhiyun struct tcmsg *tcm = nlmsg_data(n);
1430*4882a593Smuzhiyun struct nlattr *tca[TCA_MAX + 1];
1431*4882a593Smuzhiyun struct net_device *dev;
1432*4882a593Smuzhiyun u32 clid;
1433*4882a593Smuzhiyun struct Qdisc *q = NULL;
1434*4882a593Smuzhiyun struct Qdisc *p = NULL;
1435*4882a593Smuzhiyun int err;
1436*4882a593Smuzhiyun
1437*4882a593Smuzhiyun if ((n->nlmsg_type != RTM_GETQDISC) &&
1438*4882a593Smuzhiyun !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1439*4882a593Smuzhiyun return -EPERM;
1440*4882a593Smuzhiyun
1441*4882a593Smuzhiyun err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1442*4882a593Smuzhiyun rtm_tca_policy, extack);
1443*4882a593Smuzhiyun if (err < 0)
1444*4882a593Smuzhiyun return err;
1445*4882a593Smuzhiyun
1446*4882a593Smuzhiyun dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1447*4882a593Smuzhiyun if (!dev)
1448*4882a593Smuzhiyun return -ENODEV;
1449*4882a593Smuzhiyun
1450*4882a593Smuzhiyun clid = tcm->tcm_parent;
1451*4882a593Smuzhiyun if (clid) {
1452*4882a593Smuzhiyun if (clid != TC_H_ROOT) {
1453*4882a593Smuzhiyun if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1454*4882a593Smuzhiyun p = qdisc_lookup(dev, TC_H_MAJ(clid));
1455*4882a593Smuzhiyun if (!p) {
1456*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1457*4882a593Smuzhiyun return -ENOENT;
1458*4882a593Smuzhiyun }
1459*4882a593Smuzhiyun q = qdisc_leaf(p, clid);
1460*4882a593Smuzhiyun } else if (dev_ingress_queue(dev)) {
1461*4882a593Smuzhiyun q = dev_ingress_queue(dev)->qdisc_sleeping;
1462*4882a593Smuzhiyun }
1463*4882a593Smuzhiyun } else {
1464*4882a593Smuzhiyun q = rtnl_dereference(dev->qdisc);
1465*4882a593Smuzhiyun }
1466*4882a593Smuzhiyun if (!q) {
1467*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1468*4882a593Smuzhiyun return -ENOENT;
1469*4882a593Smuzhiyun }
1470*4882a593Smuzhiyun
1471*4882a593Smuzhiyun if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1472*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Invalid handle");
1473*4882a593Smuzhiyun return -EINVAL;
1474*4882a593Smuzhiyun }
1475*4882a593Smuzhiyun } else {
1476*4882a593Smuzhiyun q = qdisc_lookup(dev, tcm->tcm_handle);
1477*4882a593Smuzhiyun if (!q) {
1478*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1479*4882a593Smuzhiyun return -ENOENT;
1480*4882a593Smuzhiyun }
1481*4882a593Smuzhiyun }
1482*4882a593Smuzhiyun
1483*4882a593Smuzhiyun if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1484*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1485*4882a593Smuzhiyun return -EINVAL;
1486*4882a593Smuzhiyun }
1487*4882a593Smuzhiyun
1488*4882a593Smuzhiyun if (n->nlmsg_type == RTM_DELQDISC) {
1489*4882a593Smuzhiyun if (!clid) {
1490*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1491*4882a593Smuzhiyun return -EINVAL;
1492*4882a593Smuzhiyun }
1493*4882a593Smuzhiyun if (q->handle == 0) {
1494*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1495*4882a593Smuzhiyun return -ENOENT;
1496*4882a593Smuzhiyun }
1497*4882a593Smuzhiyun err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1498*4882a593Smuzhiyun if (err != 0)
1499*4882a593Smuzhiyun return err;
1500*4882a593Smuzhiyun } else {
1501*4882a593Smuzhiyun qdisc_notify(net, skb, n, clid, NULL, q);
1502*4882a593Smuzhiyun }
1503*4882a593Smuzhiyun return 0;
1504*4882a593Smuzhiyun }
1505*4882a593Smuzhiyun
1506*4882a593Smuzhiyun /*
1507*4882a593Smuzhiyun * Create/change qdisc.
1508*4882a593Smuzhiyun */
1509*4882a593Smuzhiyun
tc_modify_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1510*4882a593Smuzhiyun static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1511*4882a593Smuzhiyun struct netlink_ext_ack *extack)
1512*4882a593Smuzhiyun {
1513*4882a593Smuzhiyun struct net *net = sock_net(skb->sk);
1514*4882a593Smuzhiyun struct tcmsg *tcm;
1515*4882a593Smuzhiyun struct nlattr *tca[TCA_MAX + 1];
1516*4882a593Smuzhiyun struct net_device *dev;
1517*4882a593Smuzhiyun u32 clid;
1518*4882a593Smuzhiyun struct Qdisc *q, *p;
1519*4882a593Smuzhiyun int err;
1520*4882a593Smuzhiyun
1521*4882a593Smuzhiyun if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1522*4882a593Smuzhiyun return -EPERM;
1523*4882a593Smuzhiyun
1524*4882a593Smuzhiyun replay:
1525*4882a593Smuzhiyun /* Reinit, just in case something touches this. */
1526*4882a593Smuzhiyun err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1527*4882a593Smuzhiyun rtm_tca_policy, extack);
1528*4882a593Smuzhiyun if (err < 0)
1529*4882a593Smuzhiyun return err;
1530*4882a593Smuzhiyun
1531*4882a593Smuzhiyun tcm = nlmsg_data(n);
1532*4882a593Smuzhiyun clid = tcm->tcm_parent;
1533*4882a593Smuzhiyun q = p = NULL;
1534*4882a593Smuzhiyun
1535*4882a593Smuzhiyun dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1536*4882a593Smuzhiyun if (!dev)
1537*4882a593Smuzhiyun return -ENODEV;
1538*4882a593Smuzhiyun
1539*4882a593Smuzhiyun
1540*4882a593Smuzhiyun if (clid) {
1541*4882a593Smuzhiyun if (clid != TC_H_ROOT) {
1542*4882a593Smuzhiyun if (clid != TC_H_INGRESS) {
1543*4882a593Smuzhiyun p = qdisc_lookup(dev, TC_H_MAJ(clid));
1544*4882a593Smuzhiyun if (!p) {
1545*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1546*4882a593Smuzhiyun return -ENOENT;
1547*4882a593Smuzhiyun }
1548*4882a593Smuzhiyun q = qdisc_leaf(p, clid);
1549*4882a593Smuzhiyun } else if (dev_ingress_queue_create(dev)) {
1550*4882a593Smuzhiyun q = dev_ingress_queue(dev)->qdisc_sleeping;
1551*4882a593Smuzhiyun }
1552*4882a593Smuzhiyun } else {
1553*4882a593Smuzhiyun q = rtnl_dereference(dev->qdisc);
1554*4882a593Smuzhiyun }
1555*4882a593Smuzhiyun
1556*4882a593Smuzhiyun /* It may be default qdisc, ignore it */
1557*4882a593Smuzhiyun if (q && q->handle == 0)
1558*4882a593Smuzhiyun q = NULL;
1559*4882a593Smuzhiyun
1560*4882a593Smuzhiyun if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1561*4882a593Smuzhiyun if (tcm->tcm_handle) {
1562*4882a593Smuzhiyun if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1563*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1564*4882a593Smuzhiyun return -EEXIST;
1565*4882a593Smuzhiyun }
1566*4882a593Smuzhiyun if (TC_H_MIN(tcm->tcm_handle)) {
1567*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Invalid minor handle");
1568*4882a593Smuzhiyun return -EINVAL;
1569*4882a593Smuzhiyun }
1570*4882a593Smuzhiyun q = qdisc_lookup(dev, tcm->tcm_handle);
1571*4882a593Smuzhiyun if (!q)
1572*4882a593Smuzhiyun goto create_n_graft;
1573*4882a593Smuzhiyun if (n->nlmsg_flags & NLM_F_EXCL) {
1574*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1575*4882a593Smuzhiyun return -EEXIST;
1576*4882a593Smuzhiyun }
1577*4882a593Smuzhiyun if (tca[TCA_KIND] &&
1578*4882a593Smuzhiyun nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1579*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1580*4882a593Smuzhiyun return -EINVAL;
1581*4882a593Smuzhiyun }
1582*4882a593Smuzhiyun if (q == p ||
1583*4882a593Smuzhiyun (p && check_loop(q, p, 0))) {
1584*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1585*4882a593Smuzhiyun return -ELOOP;
1586*4882a593Smuzhiyun }
1587*4882a593Smuzhiyun qdisc_refcount_inc(q);
1588*4882a593Smuzhiyun goto graft;
1589*4882a593Smuzhiyun } else {
1590*4882a593Smuzhiyun if (!q)
1591*4882a593Smuzhiyun goto create_n_graft;
1592*4882a593Smuzhiyun
1593*4882a593Smuzhiyun /* This magic test requires explanation.
1594*4882a593Smuzhiyun *
1595*4882a593Smuzhiyun * We know, that some child q is already
1596*4882a593Smuzhiyun * attached to this parent and have choice:
1597*4882a593Smuzhiyun * either to change it or to create/graft new one.
1598*4882a593Smuzhiyun *
1599*4882a593Smuzhiyun * 1. We are allowed to create/graft only
1600*4882a593Smuzhiyun * if CREATE and REPLACE flags are set.
1601*4882a593Smuzhiyun *
1602*4882a593Smuzhiyun * 2. If EXCL is set, requestor wanted to say,
1603*4882a593Smuzhiyun * that qdisc tcm_handle is not expected
1604*4882a593Smuzhiyun * to exist, so that we choose create/graft too.
1605*4882a593Smuzhiyun *
1606*4882a593Smuzhiyun * 3. The last case is when no flags are set.
1607*4882a593Smuzhiyun * Alas, it is sort of hole in API, we
1608*4882a593Smuzhiyun * cannot decide what to do unambiguously.
1609*4882a593Smuzhiyun * For now we select create/graft, if
1610*4882a593Smuzhiyun * user gave KIND, which does not match existing.
1611*4882a593Smuzhiyun */
1612*4882a593Smuzhiyun if ((n->nlmsg_flags & NLM_F_CREATE) &&
1613*4882a593Smuzhiyun (n->nlmsg_flags & NLM_F_REPLACE) &&
1614*4882a593Smuzhiyun ((n->nlmsg_flags & NLM_F_EXCL) ||
1615*4882a593Smuzhiyun (tca[TCA_KIND] &&
1616*4882a593Smuzhiyun nla_strcmp(tca[TCA_KIND], q->ops->id))))
1617*4882a593Smuzhiyun goto create_n_graft;
1618*4882a593Smuzhiyun }
1619*4882a593Smuzhiyun }
1620*4882a593Smuzhiyun } else {
1621*4882a593Smuzhiyun if (!tcm->tcm_handle) {
1622*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1623*4882a593Smuzhiyun return -EINVAL;
1624*4882a593Smuzhiyun }
1625*4882a593Smuzhiyun q = qdisc_lookup(dev, tcm->tcm_handle);
1626*4882a593Smuzhiyun }
1627*4882a593Smuzhiyun
1628*4882a593Smuzhiyun /* Change qdisc parameters */
1629*4882a593Smuzhiyun if (!q) {
1630*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1631*4882a593Smuzhiyun return -ENOENT;
1632*4882a593Smuzhiyun }
1633*4882a593Smuzhiyun if (n->nlmsg_flags & NLM_F_EXCL) {
1634*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1635*4882a593Smuzhiyun return -EEXIST;
1636*4882a593Smuzhiyun }
1637*4882a593Smuzhiyun if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1638*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1639*4882a593Smuzhiyun return -EINVAL;
1640*4882a593Smuzhiyun }
1641*4882a593Smuzhiyun err = qdisc_change(q, tca, extack);
1642*4882a593Smuzhiyun if (err == 0)
1643*4882a593Smuzhiyun qdisc_notify(net, skb, n, clid, NULL, q);
1644*4882a593Smuzhiyun return err;
1645*4882a593Smuzhiyun
1646*4882a593Smuzhiyun create_n_graft:
1647*4882a593Smuzhiyun if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1648*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1649*4882a593Smuzhiyun return -ENOENT;
1650*4882a593Smuzhiyun }
1651*4882a593Smuzhiyun if (clid == TC_H_INGRESS) {
1652*4882a593Smuzhiyun if (dev_ingress_queue(dev)) {
1653*4882a593Smuzhiyun q = qdisc_create(dev, dev_ingress_queue(dev), p,
1654*4882a593Smuzhiyun tcm->tcm_parent, tcm->tcm_parent,
1655*4882a593Smuzhiyun tca, &err, extack);
1656*4882a593Smuzhiyun } else {
1657*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1658*4882a593Smuzhiyun err = -ENOENT;
1659*4882a593Smuzhiyun }
1660*4882a593Smuzhiyun } else {
1661*4882a593Smuzhiyun struct netdev_queue *dev_queue;
1662*4882a593Smuzhiyun
1663*4882a593Smuzhiyun if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1664*4882a593Smuzhiyun dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1665*4882a593Smuzhiyun else if (p)
1666*4882a593Smuzhiyun dev_queue = p->dev_queue;
1667*4882a593Smuzhiyun else
1668*4882a593Smuzhiyun dev_queue = netdev_get_tx_queue(dev, 0);
1669*4882a593Smuzhiyun
1670*4882a593Smuzhiyun q = qdisc_create(dev, dev_queue, p,
1671*4882a593Smuzhiyun tcm->tcm_parent, tcm->tcm_handle,
1672*4882a593Smuzhiyun tca, &err, extack);
1673*4882a593Smuzhiyun }
1674*4882a593Smuzhiyun if (q == NULL) {
1675*4882a593Smuzhiyun if (err == -EAGAIN)
1676*4882a593Smuzhiyun goto replay;
1677*4882a593Smuzhiyun return err;
1678*4882a593Smuzhiyun }
1679*4882a593Smuzhiyun
1680*4882a593Smuzhiyun graft:
1681*4882a593Smuzhiyun err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1682*4882a593Smuzhiyun if (err) {
1683*4882a593Smuzhiyun if (q)
1684*4882a593Smuzhiyun qdisc_put(q);
1685*4882a593Smuzhiyun return err;
1686*4882a593Smuzhiyun }
1687*4882a593Smuzhiyun
1688*4882a593Smuzhiyun return 0;
1689*4882a593Smuzhiyun }
1690*4882a593Smuzhiyun
tc_dump_qdisc_root(struct Qdisc * root,struct sk_buff * skb,struct netlink_callback * cb,int * q_idx_p,int s_q_idx,bool recur,bool dump_invisible)1691*4882a593Smuzhiyun static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1692*4882a593Smuzhiyun struct netlink_callback *cb,
1693*4882a593Smuzhiyun int *q_idx_p, int s_q_idx, bool recur,
1694*4882a593Smuzhiyun bool dump_invisible)
1695*4882a593Smuzhiyun {
1696*4882a593Smuzhiyun int ret = 0, q_idx = *q_idx_p;
1697*4882a593Smuzhiyun struct Qdisc *q;
1698*4882a593Smuzhiyun int b;
1699*4882a593Smuzhiyun
1700*4882a593Smuzhiyun if (!root)
1701*4882a593Smuzhiyun return 0;
1702*4882a593Smuzhiyun
1703*4882a593Smuzhiyun q = root;
1704*4882a593Smuzhiyun if (q_idx < s_q_idx) {
1705*4882a593Smuzhiyun q_idx++;
1706*4882a593Smuzhiyun } else {
1707*4882a593Smuzhiyun if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1708*4882a593Smuzhiyun tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1709*4882a593Smuzhiyun cb->nlh->nlmsg_seq, NLM_F_MULTI,
1710*4882a593Smuzhiyun RTM_NEWQDISC) <= 0)
1711*4882a593Smuzhiyun goto done;
1712*4882a593Smuzhiyun q_idx++;
1713*4882a593Smuzhiyun }
1714*4882a593Smuzhiyun
1715*4882a593Smuzhiyun /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1716*4882a593Smuzhiyun * itself has already been dumped.
1717*4882a593Smuzhiyun *
1718*4882a593Smuzhiyun * If we've already dumped the top-level (ingress) qdisc above and the global
1719*4882a593Smuzhiyun * qdisc hashtable, we don't want to hit it again
1720*4882a593Smuzhiyun */
1721*4882a593Smuzhiyun if (!qdisc_dev(root) || !recur)
1722*4882a593Smuzhiyun goto out;
1723*4882a593Smuzhiyun
1724*4882a593Smuzhiyun hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1725*4882a593Smuzhiyun if (q_idx < s_q_idx) {
1726*4882a593Smuzhiyun q_idx++;
1727*4882a593Smuzhiyun continue;
1728*4882a593Smuzhiyun }
1729*4882a593Smuzhiyun if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1730*4882a593Smuzhiyun tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1731*4882a593Smuzhiyun cb->nlh->nlmsg_seq, NLM_F_MULTI,
1732*4882a593Smuzhiyun RTM_NEWQDISC) <= 0)
1733*4882a593Smuzhiyun goto done;
1734*4882a593Smuzhiyun q_idx++;
1735*4882a593Smuzhiyun }
1736*4882a593Smuzhiyun
1737*4882a593Smuzhiyun out:
1738*4882a593Smuzhiyun *q_idx_p = q_idx;
1739*4882a593Smuzhiyun return ret;
1740*4882a593Smuzhiyun done:
1741*4882a593Smuzhiyun ret = -1;
1742*4882a593Smuzhiyun goto out;
1743*4882a593Smuzhiyun }
1744*4882a593Smuzhiyun
tc_dump_qdisc(struct sk_buff * skb,struct netlink_callback * cb)1745*4882a593Smuzhiyun static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1746*4882a593Smuzhiyun {
1747*4882a593Smuzhiyun struct net *net = sock_net(skb->sk);
1748*4882a593Smuzhiyun int idx, q_idx;
1749*4882a593Smuzhiyun int s_idx, s_q_idx;
1750*4882a593Smuzhiyun struct net_device *dev;
1751*4882a593Smuzhiyun const struct nlmsghdr *nlh = cb->nlh;
1752*4882a593Smuzhiyun struct nlattr *tca[TCA_MAX + 1];
1753*4882a593Smuzhiyun int err;
1754*4882a593Smuzhiyun
1755*4882a593Smuzhiyun s_idx = cb->args[0];
1756*4882a593Smuzhiyun s_q_idx = q_idx = cb->args[1];
1757*4882a593Smuzhiyun
1758*4882a593Smuzhiyun idx = 0;
1759*4882a593Smuzhiyun ASSERT_RTNL();
1760*4882a593Smuzhiyun
1761*4882a593Smuzhiyun err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1762*4882a593Smuzhiyun rtm_tca_policy, cb->extack);
1763*4882a593Smuzhiyun if (err < 0)
1764*4882a593Smuzhiyun return err;
1765*4882a593Smuzhiyun
1766*4882a593Smuzhiyun for_each_netdev(net, dev) {
1767*4882a593Smuzhiyun struct netdev_queue *dev_queue;
1768*4882a593Smuzhiyun
1769*4882a593Smuzhiyun if (idx < s_idx)
1770*4882a593Smuzhiyun goto cont;
1771*4882a593Smuzhiyun if (idx > s_idx)
1772*4882a593Smuzhiyun s_q_idx = 0;
1773*4882a593Smuzhiyun q_idx = 0;
1774*4882a593Smuzhiyun
1775*4882a593Smuzhiyun if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
1776*4882a593Smuzhiyun skb, cb, &q_idx, s_q_idx,
1777*4882a593Smuzhiyun true, tca[TCA_DUMP_INVISIBLE]) < 0)
1778*4882a593Smuzhiyun goto done;
1779*4882a593Smuzhiyun
1780*4882a593Smuzhiyun dev_queue = dev_ingress_queue(dev);
1781*4882a593Smuzhiyun if (dev_queue &&
1782*4882a593Smuzhiyun tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1783*4882a593Smuzhiyun &q_idx, s_q_idx, false,
1784*4882a593Smuzhiyun tca[TCA_DUMP_INVISIBLE]) < 0)
1785*4882a593Smuzhiyun goto done;
1786*4882a593Smuzhiyun
1787*4882a593Smuzhiyun cont:
1788*4882a593Smuzhiyun idx++;
1789*4882a593Smuzhiyun }
1790*4882a593Smuzhiyun
1791*4882a593Smuzhiyun done:
1792*4882a593Smuzhiyun cb->args[0] = idx;
1793*4882a593Smuzhiyun cb->args[1] = q_idx;
1794*4882a593Smuzhiyun
1795*4882a593Smuzhiyun return skb->len;
1796*4882a593Smuzhiyun }
1797*4882a593Smuzhiyun
1798*4882a593Smuzhiyun
1799*4882a593Smuzhiyun
1800*4882a593Smuzhiyun /************************************************
1801*4882a593Smuzhiyun * Traffic classes manipulation. *
1802*4882a593Smuzhiyun ************************************************/
1803*4882a593Smuzhiyun
tc_fill_tclass(struct sk_buff * skb,struct Qdisc * q,unsigned long cl,u32 portid,u32 seq,u16 flags,int event)1804*4882a593Smuzhiyun static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1805*4882a593Smuzhiyun unsigned long cl,
1806*4882a593Smuzhiyun u32 portid, u32 seq, u16 flags, int event)
1807*4882a593Smuzhiyun {
1808*4882a593Smuzhiyun struct tcmsg *tcm;
1809*4882a593Smuzhiyun struct nlmsghdr *nlh;
1810*4882a593Smuzhiyun unsigned char *b = skb_tail_pointer(skb);
1811*4882a593Smuzhiyun struct gnet_dump d;
1812*4882a593Smuzhiyun const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1813*4882a593Smuzhiyun
1814*4882a593Smuzhiyun cond_resched();
1815*4882a593Smuzhiyun nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1816*4882a593Smuzhiyun if (!nlh)
1817*4882a593Smuzhiyun goto out_nlmsg_trim;
1818*4882a593Smuzhiyun tcm = nlmsg_data(nlh);
1819*4882a593Smuzhiyun tcm->tcm_family = AF_UNSPEC;
1820*4882a593Smuzhiyun tcm->tcm__pad1 = 0;
1821*4882a593Smuzhiyun tcm->tcm__pad2 = 0;
1822*4882a593Smuzhiyun tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1823*4882a593Smuzhiyun tcm->tcm_parent = q->handle;
1824*4882a593Smuzhiyun tcm->tcm_handle = q->handle;
1825*4882a593Smuzhiyun tcm->tcm_info = 0;
1826*4882a593Smuzhiyun if (nla_put_string(skb, TCA_KIND, q->ops->id))
1827*4882a593Smuzhiyun goto nla_put_failure;
1828*4882a593Smuzhiyun if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1829*4882a593Smuzhiyun goto nla_put_failure;
1830*4882a593Smuzhiyun
1831*4882a593Smuzhiyun if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1832*4882a593Smuzhiyun NULL, &d, TCA_PAD) < 0)
1833*4882a593Smuzhiyun goto nla_put_failure;
1834*4882a593Smuzhiyun
1835*4882a593Smuzhiyun if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1836*4882a593Smuzhiyun goto nla_put_failure;
1837*4882a593Smuzhiyun
1838*4882a593Smuzhiyun if (gnet_stats_finish_copy(&d) < 0)
1839*4882a593Smuzhiyun goto nla_put_failure;
1840*4882a593Smuzhiyun
1841*4882a593Smuzhiyun nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1842*4882a593Smuzhiyun return skb->len;
1843*4882a593Smuzhiyun
1844*4882a593Smuzhiyun out_nlmsg_trim:
1845*4882a593Smuzhiyun nla_put_failure:
1846*4882a593Smuzhiyun nlmsg_trim(skb, b);
1847*4882a593Smuzhiyun return -1;
1848*4882a593Smuzhiyun }
1849*4882a593Smuzhiyun
tclass_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,int event)1850*4882a593Smuzhiyun static int tclass_notify(struct net *net, struct sk_buff *oskb,
1851*4882a593Smuzhiyun struct nlmsghdr *n, struct Qdisc *q,
1852*4882a593Smuzhiyun unsigned long cl, int event)
1853*4882a593Smuzhiyun {
1854*4882a593Smuzhiyun struct sk_buff *skb;
1855*4882a593Smuzhiyun u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1856*4882a593Smuzhiyun int err = 0;
1857*4882a593Smuzhiyun
1858*4882a593Smuzhiyun skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1859*4882a593Smuzhiyun if (!skb)
1860*4882a593Smuzhiyun return -ENOBUFS;
1861*4882a593Smuzhiyun
1862*4882a593Smuzhiyun if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1863*4882a593Smuzhiyun kfree_skb(skb);
1864*4882a593Smuzhiyun return -EINVAL;
1865*4882a593Smuzhiyun }
1866*4882a593Smuzhiyun
1867*4882a593Smuzhiyun err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1868*4882a593Smuzhiyun n->nlmsg_flags & NLM_F_ECHO);
1869*4882a593Smuzhiyun if (err > 0)
1870*4882a593Smuzhiyun err = 0;
1871*4882a593Smuzhiyun return err;
1872*4882a593Smuzhiyun }
1873*4882a593Smuzhiyun
tclass_del_notify(struct net * net,const struct Qdisc_class_ops * cops,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl)1874*4882a593Smuzhiyun static int tclass_del_notify(struct net *net,
1875*4882a593Smuzhiyun const struct Qdisc_class_ops *cops,
1876*4882a593Smuzhiyun struct sk_buff *oskb, struct nlmsghdr *n,
1877*4882a593Smuzhiyun struct Qdisc *q, unsigned long cl)
1878*4882a593Smuzhiyun {
1879*4882a593Smuzhiyun u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1880*4882a593Smuzhiyun struct sk_buff *skb;
1881*4882a593Smuzhiyun int err = 0;
1882*4882a593Smuzhiyun
1883*4882a593Smuzhiyun if (!cops->delete)
1884*4882a593Smuzhiyun return -EOPNOTSUPP;
1885*4882a593Smuzhiyun
1886*4882a593Smuzhiyun skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1887*4882a593Smuzhiyun if (!skb)
1888*4882a593Smuzhiyun return -ENOBUFS;
1889*4882a593Smuzhiyun
1890*4882a593Smuzhiyun if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1891*4882a593Smuzhiyun RTM_DELTCLASS) < 0) {
1892*4882a593Smuzhiyun kfree_skb(skb);
1893*4882a593Smuzhiyun return -EINVAL;
1894*4882a593Smuzhiyun }
1895*4882a593Smuzhiyun
1896*4882a593Smuzhiyun err = cops->delete(q, cl);
1897*4882a593Smuzhiyun if (err) {
1898*4882a593Smuzhiyun kfree_skb(skb);
1899*4882a593Smuzhiyun return err;
1900*4882a593Smuzhiyun }
1901*4882a593Smuzhiyun
1902*4882a593Smuzhiyun err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1903*4882a593Smuzhiyun n->nlmsg_flags & NLM_F_ECHO);
1904*4882a593Smuzhiyun if (err > 0)
1905*4882a593Smuzhiyun err = 0;
1906*4882a593Smuzhiyun return err;
1907*4882a593Smuzhiyun }
1908*4882a593Smuzhiyun
1909*4882a593Smuzhiyun #ifdef CONFIG_NET_CLS
1910*4882a593Smuzhiyun
1911*4882a593Smuzhiyun struct tcf_bind_args {
1912*4882a593Smuzhiyun struct tcf_walker w;
1913*4882a593Smuzhiyun unsigned long base;
1914*4882a593Smuzhiyun unsigned long cl;
1915*4882a593Smuzhiyun u32 classid;
1916*4882a593Smuzhiyun };
1917*4882a593Smuzhiyun
tcf_node_bind(struct tcf_proto * tp,void * n,struct tcf_walker * arg)1918*4882a593Smuzhiyun static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1919*4882a593Smuzhiyun {
1920*4882a593Smuzhiyun struct tcf_bind_args *a = (void *)arg;
1921*4882a593Smuzhiyun
1922*4882a593Smuzhiyun if (tp->ops->bind_class) {
1923*4882a593Smuzhiyun struct Qdisc *q = tcf_block_q(tp->chain->block);
1924*4882a593Smuzhiyun
1925*4882a593Smuzhiyun sch_tree_lock(q);
1926*4882a593Smuzhiyun tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
1927*4882a593Smuzhiyun sch_tree_unlock(q);
1928*4882a593Smuzhiyun }
1929*4882a593Smuzhiyun return 0;
1930*4882a593Smuzhiyun }
1931*4882a593Smuzhiyun
1932*4882a593Smuzhiyun struct tc_bind_class_args {
1933*4882a593Smuzhiyun struct qdisc_walker w;
1934*4882a593Smuzhiyun unsigned long new_cl;
1935*4882a593Smuzhiyun u32 portid;
1936*4882a593Smuzhiyun u32 clid;
1937*4882a593Smuzhiyun };
1938*4882a593Smuzhiyun
tc_bind_class_walker(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)1939*4882a593Smuzhiyun static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
1940*4882a593Smuzhiyun struct qdisc_walker *w)
1941*4882a593Smuzhiyun {
1942*4882a593Smuzhiyun struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
1943*4882a593Smuzhiyun const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1944*4882a593Smuzhiyun struct tcf_block *block;
1945*4882a593Smuzhiyun struct tcf_chain *chain;
1946*4882a593Smuzhiyun
1947*4882a593Smuzhiyun block = cops->tcf_block(q, cl, NULL);
1948*4882a593Smuzhiyun if (!block)
1949*4882a593Smuzhiyun return 0;
1950*4882a593Smuzhiyun for (chain = tcf_get_next_chain(block, NULL);
1951*4882a593Smuzhiyun chain;
1952*4882a593Smuzhiyun chain = tcf_get_next_chain(block, chain)) {
1953*4882a593Smuzhiyun struct tcf_proto *tp;
1954*4882a593Smuzhiyun
1955*4882a593Smuzhiyun for (tp = tcf_get_next_proto(chain, NULL, true);
1956*4882a593Smuzhiyun tp; tp = tcf_get_next_proto(chain, tp, true)) {
1957*4882a593Smuzhiyun struct tcf_bind_args arg = {};
1958*4882a593Smuzhiyun
1959*4882a593Smuzhiyun arg.w.fn = tcf_node_bind;
1960*4882a593Smuzhiyun arg.classid = a->clid;
1961*4882a593Smuzhiyun arg.base = cl;
1962*4882a593Smuzhiyun arg.cl = a->new_cl;
1963*4882a593Smuzhiyun tp->ops->walk(tp, &arg.w, true);
1964*4882a593Smuzhiyun }
1965*4882a593Smuzhiyun }
1966*4882a593Smuzhiyun
1967*4882a593Smuzhiyun return 0;
1968*4882a593Smuzhiyun }
1969*4882a593Smuzhiyun
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)1970*4882a593Smuzhiyun static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1971*4882a593Smuzhiyun unsigned long new_cl)
1972*4882a593Smuzhiyun {
1973*4882a593Smuzhiyun const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1974*4882a593Smuzhiyun struct tc_bind_class_args args = {};
1975*4882a593Smuzhiyun
1976*4882a593Smuzhiyun if (!cops->tcf_block)
1977*4882a593Smuzhiyun return;
1978*4882a593Smuzhiyun args.portid = portid;
1979*4882a593Smuzhiyun args.clid = clid;
1980*4882a593Smuzhiyun args.new_cl = new_cl;
1981*4882a593Smuzhiyun args.w.fn = tc_bind_class_walker;
1982*4882a593Smuzhiyun q->ops->cl_ops->walk(q, &args.w);
1983*4882a593Smuzhiyun }
1984*4882a593Smuzhiyun
1985*4882a593Smuzhiyun #else
1986*4882a593Smuzhiyun
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)1987*4882a593Smuzhiyun static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1988*4882a593Smuzhiyun unsigned long new_cl)
1989*4882a593Smuzhiyun {
1990*4882a593Smuzhiyun }
1991*4882a593Smuzhiyun
1992*4882a593Smuzhiyun #endif
1993*4882a593Smuzhiyun
tc_ctl_tclass(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1994*4882a593Smuzhiyun static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1995*4882a593Smuzhiyun struct netlink_ext_ack *extack)
1996*4882a593Smuzhiyun {
1997*4882a593Smuzhiyun struct net *net = sock_net(skb->sk);
1998*4882a593Smuzhiyun struct tcmsg *tcm = nlmsg_data(n);
1999*4882a593Smuzhiyun struct nlattr *tca[TCA_MAX + 1];
2000*4882a593Smuzhiyun struct net_device *dev;
2001*4882a593Smuzhiyun struct Qdisc *q = NULL;
2002*4882a593Smuzhiyun const struct Qdisc_class_ops *cops;
2003*4882a593Smuzhiyun unsigned long cl = 0;
2004*4882a593Smuzhiyun unsigned long new_cl;
2005*4882a593Smuzhiyun u32 portid;
2006*4882a593Smuzhiyun u32 clid;
2007*4882a593Smuzhiyun u32 qid;
2008*4882a593Smuzhiyun int err;
2009*4882a593Smuzhiyun
2010*4882a593Smuzhiyun if ((n->nlmsg_type != RTM_GETTCLASS) &&
2011*4882a593Smuzhiyun !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
2012*4882a593Smuzhiyun return -EPERM;
2013*4882a593Smuzhiyun
2014*4882a593Smuzhiyun err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
2015*4882a593Smuzhiyun rtm_tca_policy, extack);
2016*4882a593Smuzhiyun if (err < 0)
2017*4882a593Smuzhiyun return err;
2018*4882a593Smuzhiyun
2019*4882a593Smuzhiyun dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2020*4882a593Smuzhiyun if (!dev)
2021*4882a593Smuzhiyun return -ENODEV;
2022*4882a593Smuzhiyun
2023*4882a593Smuzhiyun /*
2024*4882a593Smuzhiyun parent == TC_H_UNSPEC - unspecified parent.
2025*4882a593Smuzhiyun parent == TC_H_ROOT - class is root, which has no parent.
2026*4882a593Smuzhiyun parent == X:0 - parent is root class.
2027*4882a593Smuzhiyun parent == X:Y - parent is a node in hierarchy.
2028*4882a593Smuzhiyun parent == 0:Y - parent is X:Y, where X:0 is qdisc.
2029*4882a593Smuzhiyun
2030*4882a593Smuzhiyun handle == 0:0 - generate handle from kernel pool.
2031*4882a593Smuzhiyun handle == 0:Y - class is X:Y, where X:0 is qdisc.
2032*4882a593Smuzhiyun handle == X:Y - clear.
2033*4882a593Smuzhiyun handle == X:0 - root class.
2034*4882a593Smuzhiyun */
2035*4882a593Smuzhiyun
2036*4882a593Smuzhiyun /* Step 1. Determine qdisc handle X:0 */
2037*4882a593Smuzhiyun
2038*4882a593Smuzhiyun portid = tcm->tcm_parent;
2039*4882a593Smuzhiyun clid = tcm->tcm_handle;
2040*4882a593Smuzhiyun qid = TC_H_MAJ(clid);
2041*4882a593Smuzhiyun
2042*4882a593Smuzhiyun if (portid != TC_H_ROOT) {
2043*4882a593Smuzhiyun u32 qid1 = TC_H_MAJ(portid);
2044*4882a593Smuzhiyun
2045*4882a593Smuzhiyun if (qid && qid1) {
2046*4882a593Smuzhiyun /* If both majors are known, they must be identical. */
2047*4882a593Smuzhiyun if (qid != qid1)
2048*4882a593Smuzhiyun return -EINVAL;
2049*4882a593Smuzhiyun } else if (qid1) {
2050*4882a593Smuzhiyun qid = qid1;
2051*4882a593Smuzhiyun } else if (qid == 0)
2052*4882a593Smuzhiyun qid = rtnl_dereference(dev->qdisc)->handle;
2053*4882a593Smuzhiyun
2054*4882a593Smuzhiyun /* Now qid is genuine qdisc handle consistent
2055*4882a593Smuzhiyun * both with parent and child.
2056*4882a593Smuzhiyun *
2057*4882a593Smuzhiyun * TC_H_MAJ(portid) still may be unspecified, complete it now.
2058*4882a593Smuzhiyun */
2059*4882a593Smuzhiyun if (portid)
2060*4882a593Smuzhiyun portid = TC_H_MAKE(qid, portid);
2061*4882a593Smuzhiyun } else {
2062*4882a593Smuzhiyun if (qid == 0)
2063*4882a593Smuzhiyun qid = rtnl_dereference(dev->qdisc)->handle;
2064*4882a593Smuzhiyun }
2065*4882a593Smuzhiyun
2066*4882a593Smuzhiyun /* OK. Locate qdisc */
2067*4882a593Smuzhiyun q = qdisc_lookup(dev, qid);
2068*4882a593Smuzhiyun if (!q)
2069*4882a593Smuzhiyun return -ENOENT;
2070*4882a593Smuzhiyun
2071*4882a593Smuzhiyun /* An check that it supports classes */
2072*4882a593Smuzhiyun cops = q->ops->cl_ops;
2073*4882a593Smuzhiyun if (cops == NULL)
2074*4882a593Smuzhiyun return -EINVAL;
2075*4882a593Smuzhiyun
2076*4882a593Smuzhiyun /* Now try to get class */
2077*4882a593Smuzhiyun if (clid == 0) {
2078*4882a593Smuzhiyun if (portid == TC_H_ROOT)
2079*4882a593Smuzhiyun clid = qid;
2080*4882a593Smuzhiyun } else
2081*4882a593Smuzhiyun clid = TC_H_MAKE(qid, clid);
2082*4882a593Smuzhiyun
2083*4882a593Smuzhiyun if (clid)
2084*4882a593Smuzhiyun cl = cops->find(q, clid);
2085*4882a593Smuzhiyun
2086*4882a593Smuzhiyun if (cl == 0) {
2087*4882a593Smuzhiyun err = -ENOENT;
2088*4882a593Smuzhiyun if (n->nlmsg_type != RTM_NEWTCLASS ||
2089*4882a593Smuzhiyun !(n->nlmsg_flags & NLM_F_CREATE))
2090*4882a593Smuzhiyun goto out;
2091*4882a593Smuzhiyun } else {
2092*4882a593Smuzhiyun switch (n->nlmsg_type) {
2093*4882a593Smuzhiyun case RTM_NEWTCLASS:
2094*4882a593Smuzhiyun err = -EEXIST;
2095*4882a593Smuzhiyun if (n->nlmsg_flags & NLM_F_EXCL)
2096*4882a593Smuzhiyun goto out;
2097*4882a593Smuzhiyun break;
2098*4882a593Smuzhiyun case RTM_DELTCLASS:
2099*4882a593Smuzhiyun err = tclass_del_notify(net, cops, skb, n, q, cl);
2100*4882a593Smuzhiyun /* Unbind the class with flilters with 0 */
2101*4882a593Smuzhiyun tc_bind_tclass(q, portid, clid, 0);
2102*4882a593Smuzhiyun goto out;
2103*4882a593Smuzhiyun case RTM_GETTCLASS:
2104*4882a593Smuzhiyun err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
2105*4882a593Smuzhiyun goto out;
2106*4882a593Smuzhiyun default:
2107*4882a593Smuzhiyun err = -EINVAL;
2108*4882a593Smuzhiyun goto out;
2109*4882a593Smuzhiyun }
2110*4882a593Smuzhiyun }
2111*4882a593Smuzhiyun
2112*4882a593Smuzhiyun if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2113*4882a593Smuzhiyun NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2114*4882a593Smuzhiyun return -EOPNOTSUPP;
2115*4882a593Smuzhiyun }
2116*4882a593Smuzhiyun
2117*4882a593Smuzhiyun new_cl = cl;
2118*4882a593Smuzhiyun err = -EOPNOTSUPP;
2119*4882a593Smuzhiyun if (cops->change)
2120*4882a593Smuzhiyun err = cops->change(q, clid, portid, tca, &new_cl, extack);
2121*4882a593Smuzhiyun if (err == 0) {
2122*4882a593Smuzhiyun tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
2123*4882a593Smuzhiyun /* We just create a new class, need to do reverse binding. */
2124*4882a593Smuzhiyun if (cl != new_cl)
2125*4882a593Smuzhiyun tc_bind_tclass(q, portid, clid, new_cl);
2126*4882a593Smuzhiyun }
2127*4882a593Smuzhiyun out:
2128*4882a593Smuzhiyun return err;
2129*4882a593Smuzhiyun }
2130*4882a593Smuzhiyun
2131*4882a593Smuzhiyun struct qdisc_dump_args {
2132*4882a593Smuzhiyun struct qdisc_walker w;
2133*4882a593Smuzhiyun struct sk_buff *skb;
2134*4882a593Smuzhiyun struct netlink_callback *cb;
2135*4882a593Smuzhiyun };
2136*4882a593Smuzhiyun
qdisc_class_dump(struct Qdisc * q,unsigned long cl,struct qdisc_walker * arg)2137*4882a593Smuzhiyun static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2138*4882a593Smuzhiyun struct qdisc_walker *arg)
2139*4882a593Smuzhiyun {
2140*4882a593Smuzhiyun struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2141*4882a593Smuzhiyun
2142*4882a593Smuzhiyun return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2143*4882a593Smuzhiyun a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2144*4882a593Smuzhiyun RTM_NEWTCLASS);
2145*4882a593Smuzhiyun }
2146*4882a593Smuzhiyun
tc_dump_tclass_qdisc(struct Qdisc * q,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t)2147*4882a593Smuzhiyun static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2148*4882a593Smuzhiyun struct tcmsg *tcm, struct netlink_callback *cb,
2149*4882a593Smuzhiyun int *t_p, int s_t)
2150*4882a593Smuzhiyun {
2151*4882a593Smuzhiyun struct qdisc_dump_args arg;
2152*4882a593Smuzhiyun
2153*4882a593Smuzhiyun if (tc_qdisc_dump_ignore(q, false) ||
2154*4882a593Smuzhiyun *t_p < s_t || !q->ops->cl_ops ||
2155*4882a593Smuzhiyun (tcm->tcm_parent &&
2156*4882a593Smuzhiyun TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2157*4882a593Smuzhiyun (*t_p)++;
2158*4882a593Smuzhiyun return 0;
2159*4882a593Smuzhiyun }
2160*4882a593Smuzhiyun if (*t_p > s_t)
2161*4882a593Smuzhiyun memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2162*4882a593Smuzhiyun arg.w.fn = qdisc_class_dump;
2163*4882a593Smuzhiyun arg.skb = skb;
2164*4882a593Smuzhiyun arg.cb = cb;
2165*4882a593Smuzhiyun arg.w.stop = 0;
2166*4882a593Smuzhiyun arg.w.skip = cb->args[1];
2167*4882a593Smuzhiyun arg.w.count = 0;
2168*4882a593Smuzhiyun q->ops->cl_ops->walk(q, &arg.w);
2169*4882a593Smuzhiyun cb->args[1] = arg.w.count;
2170*4882a593Smuzhiyun if (arg.w.stop)
2171*4882a593Smuzhiyun return -1;
2172*4882a593Smuzhiyun (*t_p)++;
2173*4882a593Smuzhiyun return 0;
2174*4882a593Smuzhiyun }
2175*4882a593Smuzhiyun
tc_dump_tclass_root(struct Qdisc * root,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t,bool recur)2176*4882a593Smuzhiyun static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2177*4882a593Smuzhiyun struct tcmsg *tcm, struct netlink_callback *cb,
2178*4882a593Smuzhiyun int *t_p, int s_t, bool recur)
2179*4882a593Smuzhiyun {
2180*4882a593Smuzhiyun struct Qdisc *q;
2181*4882a593Smuzhiyun int b;
2182*4882a593Smuzhiyun
2183*4882a593Smuzhiyun if (!root)
2184*4882a593Smuzhiyun return 0;
2185*4882a593Smuzhiyun
2186*4882a593Smuzhiyun if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2187*4882a593Smuzhiyun return -1;
2188*4882a593Smuzhiyun
2189*4882a593Smuzhiyun if (!qdisc_dev(root) || !recur)
2190*4882a593Smuzhiyun return 0;
2191*4882a593Smuzhiyun
2192*4882a593Smuzhiyun if (tcm->tcm_parent) {
2193*4882a593Smuzhiyun q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2194*4882a593Smuzhiyun if (q && q != root &&
2195*4882a593Smuzhiyun tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2196*4882a593Smuzhiyun return -1;
2197*4882a593Smuzhiyun return 0;
2198*4882a593Smuzhiyun }
2199*4882a593Smuzhiyun hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2200*4882a593Smuzhiyun if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2201*4882a593Smuzhiyun return -1;
2202*4882a593Smuzhiyun }
2203*4882a593Smuzhiyun
2204*4882a593Smuzhiyun return 0;
2205*4882a593Smuzhiyun }
2206*4882a593Smuzhiyun
tc_dump_tclass(struct sk_buff * skb,struct netlink_callback * cb)2207*4882a593Smuzhiyun static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2208*4882a593Smuzhiyun {
2209*4882a593Smuzhiyun struct tcmsg *tcm = nlmsg_data(cb->nlh);
2210*4882a593Smuzhiyun struct net *net = sock_net(skb->sk);
2211*4882a593Smuzhiyun struct netdev_queue *dev_queue;
2212*4882a593Smuzhiyun struct net_device *dev;
2213*4882a593Smuzhiyun int t, s_t;
2214*4882a593Smuzhiyun
2215*4882a593Smuzhiyun if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2216*4882a593Smuzhiyun return 0;
2217*4882a593Smuzhiyun dev = dev_get_by_index(net, tcm->tcm_ifindex);
2218*4882a593Smuzhiyun if (!dev)
2219*4882a593Smuzhiyun return 0;
2220*4882a593Smuzhiyun
2221*4882a593Smuzhiyun s_t = cb->args[0];
2222*4882a593Smuzhiyun t = 0;
2223*4882a593Smuzhiyun
2224*4882a593Smuzhiyun if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
2225*4882a593Smuzhiyun skb, tcm, cb, &t, s_t, true) < 0)
2226*4882a593Smuzhiyun goto done;
2227*4882a593Smuzhiyun
2228*4882a593Smuzhiyun dev_queue = dev_ingress_queue(dev);
2229*4882a593Smuzhiyun if (dev_queue &&
2230*4882a593Smuzhiyun tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
2231*4882a593Smuzhiyun &t, s_t, false) < 0)
2232*4882a593Smuzhiyun goto done;
2233*4882a593Smuzhiyun
2234*4882a593Smuzhiyun done:
2235*4882a593Smuzhiyun cb->args[0] = t;
2236*4882a593Smuzhiyun
2237*4882a593Smuzhiyun dev_put(dev);
2238*4882a593Smuzhiyun return skb->len;
2239*4882a593Smuzhiyun }
2240*4882a593Smuzhiyun
2241*4882a593Smuzhiyun #ifdef CONFIG_PROC_FS
psched_show(struct seq_file * seq,void * v)2242*4882a593Smuzhiyun static int psched_show(struct seq_file *seq, void *v)
2243*4882a593Smuzhiyun {
2244*4882a593Smuzhiyun seq_printf(seq, "%08x %08x %08x %08x\n",
2245*4882a593Smuzhiyun (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2246*4882a593Smuzhiyun 1000000,
2247*4882a593Smuzhiyun (u32)NSEC_PER_SEC / hrtimer_resolution);
2248*4882a593Smuzhiyun
2249*4882a593Smuzhiyun return 0;
2250*4882a593Smuzhiyun }
2251*4882a593Smuzhiyun
psched_net_init(struct net * net)2252*4882a593Smuzhiyun static int __net_init psched_net_init(struct net *net)
2253*4882a593Smuzhiyun {
2254*4882a593Smuzhiyun struct proc_dir_entry *e;
2255*4882a593Smuzhiyun
2256*4882a593Smuzhiyun e = proc_create_single("psched", 0, net->proc_net, psched_show);
2257*4882a593Smuzhiyun if (e == NULL)
2258*4882a593Smuzhiyun return -ENOMEM;
2259*4882a593Smuzhiyun
2260*4882a593Smuzhiyun return 0;
2261*4882a593Smuzhiyun }
2262*4882a593Smuzhiyun
psched_net_exit(struct net * net)2263*4882a593Smuzhiyun static void __net_exit psched_net_exit(struct net *net)
2264*4882a593Smuzhiyun {
2265*4882a593Smuzhiyun remove_proc_entry("psched", net->proc_net);
2266*4882a593Smuzhiyun }
2267*4882a593Smuzhiyun #else
psched_net_init(struct net * net)2268*4882a593Smuzhiyun static int __net_init psched_net_init(struct net *net)
2269*4882a593Smuzhiyun {
2270*4882a593Smuzhiyun return 0;
2271*4882a593Smuzhiyun }
2272*4882a593Smuzhiyun
psched_net_exit(struct net * net)2273*4882a593Smuzhiyun static void __net_exit psched_net_exit(struct net *net)
2274*4882a593Smuzhiyun {
2275*4882a593Smuzhiyun }
2276*4882a593Smuzhiyun #endif
2277*4882a593Smuzhiyun
2278*4882a593Smuzhiyun static struct pernet_operations psched_net_ops = {
2279*4882a593Smuzhiyun .init = psched_net_init,
2280*4882a593Smuzhiyun .exit = psched_net_exit,
2281*4882a593Smuzhiyun };
2282*4882a593Smuzhiyun
pktsched_init(void)2283*4882a593Smuzhiyun static int __init pktsched_init(void)
2284*4882a593Smuzhiyun {
2285*4882a593Smuzhiyun int err;
2286*4882a593Smuzhiyun
2287*4882a593Smuzhiyun err = register_pernet_subsys(&psched_net_ops);
2288*4882a593Smuzhiyun if (err) {
2289*4882a593Smuzhiyun pr_err("pktsched_init: "
2290*4882a593Smuzhiyun "cannot initialize per netns operations\n");
2291*4882a593Smuzhiyun return err;
2292*4882a593Smuzhiyun }
2293*4882a593Smuzhiyun
2294*4882a593Smuzhiyun register_qdisc(&pfifo_fast_ops);
2295*4882a593Smuzhiyun register_qdisc(&pfifo_qdisc_ops);
2296*4882a593Smuzhiyun register_qdisc(&bfifo_qdisc_ops);
2297*4882a593Smuzhiyun register_qdisc(&pfifo_head_drop_qdisc_ops);
2298*4882a593Smuzhiyun register_qdisc(&mq_qdisc_ops);
2299*4882a593Smuzhiyun register_qdisc(&noqueue_qdisc_ops);
2300*4882a593Smuzhiyun
2301*4882a593Smuzhiyun rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2302*4882a593Smuzhiyun rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2303*4882a593Smuzhiyun rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2304*4882a593Smuzhiyun 0);
2305*4882a593Smuzhiyun rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2306*4882a593Smuzhiyun rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2307*4882a593Smuzhiyun rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2308*4882a593Smuzhiyun 0);
2309*4882a593Smuzhiyun
2310*4882a593Smuzhiyun return 0;
2311*4882a593Smuzhiyun }
2312*4882a593Smuzhiyun
2313*4882a593Smuzhiyun subsys_initcall(pktsched_init);
2314