xref: /OK3568_Linux_fs/kernel/net/sched/sch_api.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * net/sched/sch_api.c	Packet scheduler API.
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * Fixes:
8*4882a593Smuzhiyun  *
9*4882a593Smuzhiyun  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
10*4882a593Smuzhiyun  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
11*4882a593Smuzhiyun  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
12*4882a593Smuzhiyun  */
13*4882a593Smuzhiyun 
14*4882a593Smuzhiyun #include <linux/module.h>
15*4882a593Smuzhiyun #include <linux/types.h>
16*4882a593Smuzhiyun #include <linux/kernel.h>
17*4882a593Smuzhiyun #include <linux/string.h>
18*4882a593Smuzhiyun #include <linux/errno.h>
19*4882a593Smuzhiyun #include <linux/skbuff.h>
20*4882a593Smuzhiyun #include <linux/init.h>
21*4882a593Smuzhiyun #include <linux/proc_fs.h>
22*4882a593Smuzhiyun #include <linux/seq_file.h>
23*4882a593Smuzhiyun #include <linux/kmod.h>
24*4882a593Smuzhiyun #include <linux/list.h>
25*4882a593Smuzhiyun #include <linux/hrtimer.h>
26*4882a593Smuzhiyun #include <linux/slab.h>
27*4882a593Smuzhiyun #include <linux/hashtable.h>
28*4882a593Smuzhiyun 
29*4882a593Smuzhiyun #include <net/net_namespace.h>
30*4882a593Smuzhiyun #include <net/sock.h>
31*4882a593Smuzhiyun #include <net/netlink.h>
32*4882a593Smuzhiyun #include <net/pkt_sched.h>
33*4882a593Smuzhiyun #include <net/pkt_cls.h>
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun #include <trace/events/qdisc.h>
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun /*
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun    Short review.
40*4882a593Smuzhiyun    -------------
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun    This file consists of two interrelated parts:
43*4882a593Smuzhiyun 
44*4882a593Smuzhiyun    1. queueing disciplines manager frontend.
45*4882a593Smuzhiyun    2. traffic classes manager frontend.
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun    Generally, queueing discipline ("qdisc") is a black box,
48*4882a593Smuzhiyun    which is able to enqueue packets and to dequeue them (when
49*4882a593Smuzhiyun    device is ready to send something) in order and at times
50*4882a593Smuzhiyun    determined by algorithm hidden in it.
51*4882a593Smuzhiyun 
52*4882a593Smuzhiyun    qdisc's are divided to two categories:
53*4882a593Smuzhiyun    - "queues", which have no internal structure visible from outside.
54*4882a593Smuzhiyun    - "schedulers", which split all the packets to "traffic classes",
55*4882a593Smuzhiyun      using "packet classifiers" (look at cls_api.c)
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun    In turn, classes may have child qdiscs (as rule, queues)
58*4882a593Smuzhiyun    attached to them etc. etc. etc.
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun    The goal of the routines in this file is to translate
61*4882a593Smuzhiyun    information supplied by user in the form of handles
62*4882a593Smuzhiyun    to more intelligible for kernel form, to make some sanity
63*4882a593Smuzhiyun    checks and part of work, which is common to all qdiscs
64*4882a593Smuzhiyun    and to provide rtnetlink notifications.
65*4882a593Smuzhiyun 
66*4882a593Smuzhiyun    All real intelligent work is done inside qdisc modules.
67*4882a593Smuzhiyun 
68*4882a593Smuzhiyun 
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun    Every discipline has two major routines: enqueue and dequeue.
71*4882a593Smuzhiyun 
72*4882a593Smuzhiyun    ---dequeue
73*4882a593Smuzhiyun 
74*4882a593Smuzhiyun    dequeue usually returns a skb to send. It is allowed to return NULL,
75*4882a593Smuzhiyun    but it does not mean that queue is empty, it just means that
76*4882a593Smuzhiyun    discipline does not want to send anything this time.
77*4882a593Smuzhiyun    Queue is really empty if q->q.qlen == 0.
78*4882a593Smuzhiyun    For complicated disciplines with multiple queues q->q is not
79*4882a593Smuzhiyun    real packet queue, but however q->q.qlen must be valid.
80*4882a593Smuzhiyun 
81*4882a593Smuzhiyun    ---enqueue
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun    enqueue returns 0, if packet was enqueued successfully.
84*4882a593Smuzhiyun    If packet (this one or another one) was dropped, it returns
85*4882a593Smuzhiyun    not zero error code.
86*4882a593Smuzhiyun    NET_XMIT_DROP 	- this packet dropped
87*4882a593Smuzhiyun      Expected action: do not backoff, but wait until queue will clear.
88*4882a593Smuzhiyun    NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
89*4882a593Smuzhiyun      Expected action: backoff or ignore
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun    Auxiliary routines:
92*4882a593Smuzhiyun 
93*4882a593Smuzhiyun    ---peek
94*4882a593Smuzhiyun 
95*4882a593Smuzhiyun    like dequeue but without removing a packet from the queue
96*4882a593Smuzhiyun 
97*4882a593Smuzhiyun    ---reset
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun    returns qdisc to initial state: purge all buffers, clear all
100*4882a593Smuzhiyun    timers, counters (except for statistics) etc.
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun    ---init
103*4882a593Smuzhiyun 
104*4882a593Smuzhiyun    initializes newly created qdisc.
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun    ---destroy
107*4882a593Smuzhiyun 
108*4882a593Smuzhiyun    destroys resources allocated by init and during lifetime of qdisc.
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun    ---change
111*4882a593Smuzhiyun 
112*4882a593Smuzhiyun    changes qdisc parameters.
113*4882a593Smuzhiyun  */
114*4882a593Smuzhiyun 
115*4882a593Smuzhiyun /* Protects list of registered TC modules. It is pure SMP lock. */
116*4882a593Smuzhiyun static DEFINE_RWLOCK(qdisc_mod_lock);
117*4882a593Smuzhiyun 
118*4882a593Smuzhiyun 
119*4882a593Smuzhiyun /************************************************
120*4882a593Smuzhiyun  *	Queueing disciplines manipulation.	*
121*4882a593Smuzhiyun  ************************************************/
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun 
124*4882a593Smuzhiyun /* The list of all installed queueing disciplines. */
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun static struct Qdisc_ops *qdisc_base;
127*4882a593Smuzhiyun 
128*4882a593Smuzhiyun /* Register/unregister queueing discipline */
129*4882a593Smuzhiyun 
register_qdisc(struct Qdisc_ops * qops)130*4882a593Smuzhiyun int register_qdisc(struct Qdisc_ops *qops)
131*4882a593Smuzhiyun {
132*4882a593Smuzhiyun 	struct Qdisc_ops *q, **qp;
133*4882a593Smuzhiyun 	int rc = -EEXIST;
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun 	write_lock(&qdisc_mod_lock);
136*4882a593Smuzhiyun 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
137*4882a593Smuzhiyun 		if (!strcmp(qops->id, q->id))
138*4882a593Smuzhiyun 			goto out;
139*4882a593Smuzhiyun 
140*4882a593Smuzhiyun 	if (qops->enqueue == NULL)
141*4882a593Smuzhiyun 		qops->enqueue = noop_qdisc_ops.enqueue;
142*4882a593Smuzhiyun 	if (qops->peek == NULL) {
143*4882a593Smuzhiyun 		if (qops->dequeue == NULL)
144*4882a593Smuzhiyun 			qops->peek = noop_qdisc_ops.peek;
145*4882a593Smuzhiyun 		else
146*4882a593Smuzhiyun 			goto out_einval;
147*4882a593Smuzhiyun 	}
148*4882a593Smuzhiyun 	if (qops->dequeue == NULL)
149*4882a593Smuzhiyun 		qops->dequeue = noop_qdisc_ops.dequeue;
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun 	if (qops->cl_ops) {
152*4882a593Smuzhiyun 		const struct Qdisc_class_ops *cops = qops->cl_ops;
153*4882a593Smuzhiyun 
154*4882a593Smuzhiyun 		if (!(cops->find && cops->walk && cops->leaf))
155*4882a593Smuzhiyun 			goto out_einval;
156*4882a593Smuzhiyun 
157*4882a593Smuzhiyun 		if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
158*4882a593Smuzhiyun 			goto out_einval;
159*4882a593Smuzhiyun 	}
160*4882a593Smuzhiyun 
161*4882a593Smuzhiyun 	qops->next = NULL;
162*4882a593Smuzhiyun 	*qp = qops;
163*4882a593Smuzhiyun 	rc = 0;
164*4882a593Smuzhiyun out:
165*4882a593Smuzhiyun 	write_unlock(&qdisc_mod_lock);
166*4882a593Smuzhiyun 	return rc;
167*4882a593Smuzhiyun 
168*4882a593Smuzhiyun out_einval:
169*4882a593Smuzhiyun 	rc = -EINVAL;
170*4882a593Smuzhiyun 	goto out;
171*4882a593Smuzhiyun }
172*4882a593Smuzhiyun EXPORT_SYMBOL(register_qdisc);
173*4882a593Smuzhiyun 
unregister_qdisc(struct Qdisc_ops * qops)174*4882a593Smuzhiyun int unregister_qdisc(struct Qdisc_ops *qops)
175*4882a593Smuzhiyun {
176*4882a593Smuzhiyun 	struct Qdisc_ops *q, **qp;
177*4882a593Smuzhiyun 	int err = -ENOENT;
178*4882a593Smuzhiyun 
179*4882a593Smuzhiyun 	write_lock(&qdisc_mod_lock);
180*4882a593Smuzhiyun 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
181*4882a593Smuzhiyun 		if (q == qops)
182*4882a593Smuzhiyun 			break;
183*4882a593Smuzhiyun 	if (q) {
184*4882a593Smuzhiyun 		*qp = q->next;
185*4882a593Smuzhiyun 		q->next = NULL;
186*4882a593Smuzhiyun 		err = 0;
187*4882a593Smuzhiyun 	}
188*4882a593Smuzhiyun 	write_unlock(&qdisc_mod_lock);
189*4882a593Smuzhiyun 	return err;
190*4882a593Smuzhiyun }
191*4882a593Smuzhiyun EXPORT_SYMBOL(unregister_qdisc);
192*4882a593Smuzhiyun 
193*4882a593Smuzhiyun /* Get default qdisc if not otherwise specified */
qdisc_get_default(char * name,size_t len)194*4882a593Smuzhiyun void qdisc_get_default(char *name, size_t len)
195*4882a593Smuzhiyun {
196*4882a593Smuzhiyun 	read_lock(&qdisc_mod_lock);
197*4882a593Smuzhiyun 	strlcpy(name, default_qdisc_ops->id, len);
198*4882a593Smuzhiyun 	read_unlock(&qdisc_mod_lock);
199*4882a593Smuzhiyun }
200*4882a593Smuzhiyun 
qdisc_lookup_default(const char * name)201*4882a593Smuzhiyun static struct Qdisc_ops *qdisc_lookup_default(const char *name)
202*4882a593Smuzhiyun {
203*4882a593Smuzhiyun 	struct Qdisc_ops *q = NULL;
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun 	for (q = qdisc_base; q; q = q->next) {
206*4882a593Smuzhiyun 		if (!strcmp(name, q->id)) {
207*4882a593Smuzhiyun 			if (!try_module_get(q->owner))
208*4882a593Smuzhiyun 				q = NULL;
209*4882a593Smuzhiyun 			break;
210*4882a593Smuzhiyun 		}
211*4882a593Smuzhiyun 	}
212*4882a593Smuzhiyun 
213*4882a593Smuzhiyun 	return q;
214*4882a593Smuzhiyun }
215*4882a593Smuzhiyun 
216*4882a593Smuzhiyun /* Set new default qdisc to use */
qdisc_set_default(const char * name)217*4882a593Smuzhiyun int qdisc_set_default(const char *name)
218*4882a593Smuzhiyun {
219*4882a593Smuzhiyun 	const struct Qdisc_ops *ops;
220*4882a593Smuzhiyun 
221*4882a593Smuzhiyun 	if (!capable(CAP_NET_ADMIN))
222*4882a593Smuzhiyun 		return -EPERM;
223*4882a593Smuzhiyun 
224*4882a593Smuzhiyun 	write_lock(&qdisc_mod_lock);
225*4882a593Smuzhiyun 	ops = qdisc_lookup_default(name);
226*4882a593Smuzhiyun 	if (!ops) {
227*4882a593Smuzhiyun 		/* Not found, drop lock and try to load module */
228*4882a593Smuzhiyun 		write_unlock(&qdisc_mod_lock);
229*4882a593Smuzhiyun 		request_module("sch_%s", name);
230*4882a593Smuzhiyun 		write_lock(&qdisc_mod_lock);
231*4882a593Smuzhiyun 
232*4882a593Smuzhiyun 		ops = qdisc_lookup_default(name);
233*4882a593Smuzhiyun 	}
234*4882a593Smuzhiyun 
235*4882a593Smuzhiyun 	if (ops) {
236*4882a593Smuzhiyun 		/* Set new default */
237*4882a593Smuzhiyun 		module_put(default_qdisc_ops->owner);
238*4882a593Smuzhiyun 		default_qdisc_ops = ops;
239*4882a593Smuzhiyun 	}
240*4882a593Smuzhiyun 	write_unlock(&qdisc_mod_lock);
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun 	return ops ? 0 : -ENOENT;
243*4882a593Smuzhiyun }
244*4882a593Smuzhiyun 
245*4882a593Smuzhiyun #ifdef CONFIG_NET_SCH_DEFAULT
246*4882a593Smuzhiyun /* Set default value from kernel config */
sch_default_qdisc(void)247*4882a593Smuzhiyun static int __init sch_default_qdisc(void)
248*4882a593Smuzhiyun {
249*4882a593Smuzhiyun 	return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun late_initcall(sch_default_qdisc);
252*4882a593Smuzhiyun #endif
253*4882a593Smuzhiyun 
254*4882a593Smuzhiyun /* We know handle. Find qdisc among all qdisc's attached to device
255*4882a593Smuzhiyun  * (root qdisc, all its children, children of children etc.)
256*4882a593Smuzhiyun  * Note: caller either uses rtnl or rcu_read_lock()
257*4882a593Smuzhiyun  */
258*4882a593Smuzhiyun 
qdisc_match_from_root(struct Qdisc * root,u32 handle)259*4882a593Smuzhiyun static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
260*4882a593Smuzhiyun {
261*4882a593Smuzhiyun 	struct Qdisc *q;
262*4882a593Smuzhiyun 
263*4882a593Smuzhiyun 	if (!qdisc_dev(root))
264*4882a593Smuzhiyun 		return (root->handle == handle ? root : NULL);
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 	if (!(root->flags & TCQ_F_BUILTIN) &&
267*4882a593Smuzhiyun 	    root->handle == handle)
268*4882a593Smuzhiyun 		return root;
269*4882a593Smuzhiyun 
270*4882a593Smuzhiyun 	hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
271*4882a593Smuzhiyun 				   lockdep_rtnl_is_held()) {
272*4882a593Smuzhiyun 		if (q->handle == handle)
273*4882a593Smuzhiyun 			return q;
274*4882a593Smuzhiyun 	}
275*4882a593Smuzhiyun 	return NULL;
276*4882a593Smuzhiyun }
277*4882a593Smuzhiyun 
qdisc_hash_add(struct Qdisc * q,bool invisible)278*4882a593Smuzhiyun void qdisc_hash_add(struct Qdisc *q, bool invisible)
279*4882a593Smuzhiyun {
280*4882a593Smuzhiyun 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
281*4882a593Smuzhiyun 		ASSERT_RTNL();
282*4882a593Smuzhiyun 		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
283*4882a593Smuzhiyun 		if (invisible)
284*4882a593Smuzhiyun 			q->flags |= TCQ_F_INVISIBLE;
285*4882a593Smuzhiyun 	}
286*4882a593Smuzhiyun }
287*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_hash_add);
288*4882a593Smuzhiyun 
qdisc_hash_del(struct Qdisc * q)289*4882a593Smuzhiyun void qdisc_hash_del(struct Qdisc *q)
290*4882a593Smuzhiyun {
291*4882a593Smuzhiyun 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
292*4882a593Smuzhiyun 		ASSERT_RTNL();
293*4882a593Smuzhiyun 		hash_del_rcu(&q->hash);
294*4882a593Smuzhiyun 	}
295*4882a593Smuzhiyun }
296*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_hash_del);
297*4882a593Smuzhiyun 
qdisc_lookup(struct net_device * dev,u32 handle)298*4882a593Smuzhiyun struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
299*4882a593Smuzhiyun {
300*4882a593Smuzhiyun 	struct Qdisc *q;
301*4882a593Smuzhiyun 
302*4882a593Smuzhiyun 	if (!handle)
303*4882a593Smuzhiyun 		return NULL;
304*4882a593Smuzhiyun 	q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
305*4882a593Smuzhiyun 	if (q)
306*4882a593Smuzhiyun 		goto out;
307*4882a593Smuzhiyun 
308*4882a593Smuzhiyun 	if (dev_ingress_queue(dev))
309*4882a593Smuzhiyun 		q = qdisc_match_from_root(
310*4882a593Smuzhiyun 			dev_ingress_queue(dev)->qdisc_sleeping,
311*4882a593Smuzhiyun 			handle);
312*4882a593Smuzhiyun out:
313*4882a593Smuzhiyun 	return q;
314*4882a593Smuzhiyun }
315*4882a593Smuzhiyun 
qdisc_lookup_rcu(struct net_device * dev,u32 handle)316*4882a593Smuzhiyun struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
317*4882a593Smuzhiyun {
318*4882a593Smuzhiyun 	struct netdev_queue *nq;
319*4882a593Smuzhiyun 	struct Qdisc *q;
320*4882a593Smuzhiyun 
321*4882a593Smuzhiyun 	if (!handle)
322*4882a593Smuzhiyun 		return NULL;
323*4882a593Smuzhiyun 	q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
324*4882a593Smuzhiyun 	if (q)
325*4882a593Smuzhiyun 		goto out;
326*4882a593Smuzhiyun 
327*4882a593Smuzhiyun 	nq = dev_ingress_queue_rcu(dev);
328*4882a593Smuzhiyun 	if (nq)
329*4882a593Smuzhiyun 		q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
330*4882a593Smuzhiyun out:
331*4882a593Smuzhiyun 	return q;
332*4882a593Smuzhiyun }
333*4882a593Smuzhiyun 
qdisc_leaf(struct Qdisc * p,u32 classid)334*4882a593Smuzhiyun static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
335*4882a593Smuzhiyun {
336*4882a593Smuzhiyun 	unsigned long cl;
337*4882a593Smuzhiyun 	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
338*4882a593Smuzhiyun 
339*4882a593Smuzhiyun 	if (cops == NULL)
340*4882a593Smuzhiyun 		return NULL;
341*4882a593Smuzhiyun 	cl = cops->find(p, classid);
342*4882a593Smuzhiyun 
343*4882a593Smuzhiyun 	if (cl == 0)
344*4882a593Smuzhiyun 		return NULL;
345*4882a593Smuzhiyun 	return cops->leaf(p, cl);
346*4882a593Smuzhiyun }
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun /* Find queueing discipline by name */
349*4882a593Smuzhiyun 
qdisc_lookup_ops(struct nlattr * kind)350*4882a593Smuzhiyun static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
351*4882a593Smuzhiyun {
352*4882a593Smuzhiyun 	struct Qdisc_ops *q = NULL;
353*4882a593Smuzhiyun 
354*4882a593Smuzhiyun 	if (kind) {
355*4882a593Smuzhiyun 		read_lock(&qdisc_mod_lock);
356*4882a593Smuzhiyun 		for (q = qdisc_base; q; q = q->next) {
357*4882a593Smuzhiyun 			if (nla_strcmp(kind, q->id) == 0) {
358*4882a593Smuzhiyun 				if (!try_module_get(q->owner))
359*4882a593Smuzhiyun 					q = NULL;
360*4882a593Smuzhiyun 				break;
361*4882a593Smuzhiyun 			}
362*4882a593Smuzhiyun 		}
363*4882a593Smuzhiyun 		read_unlock(&qdisc_mod_lock);
364*4882a593Smuzhiyun 	}
365*4882a593Smuzhiyun 	return q;
366*4882a593Smuzhiyun }
367*4882a593Smuzhiyun 
368*4882a593Smuzhiyun /* The linklayer setting were not transferred from iproute2, in older
369*4882a593Smuzhiyun  * versions, and the rate tables lookup systems have been dropped in
370*4882a593Smuzhiyun  * the kernel. To keep backward compatible with older iproute2 tc
371*4882a593Smuzhiyun  * utils, we detect the linklayer setting by detecting if the rate
372*4882a593Smuzhiyun  * table were modified.
373*4882a593Smuzhiyun  *
374*4882a593Smuzhiyun  * For linklayer ATM table entries, the rate table will be aligned to
375*4882a593Smuzhiyun  * 48 bytes, thus some table entries will contain the same value.  The
376*4882a593Smuzhiyun  * mpu (min packet unit) is also encoded into the old rate table, thus
377*4882a593Smuzhiyun  * starting from the mpu, we find low and high table entries for
378*4882a593Smuzhiyun  * mapping this cell.  If these entries contain the same value, when
379*4882a593Smuzhiyun  * the rate tables have been modified for linklayer ATM.
380*4882a593Smuzhiyun  *
381*4882a593Smuzhiyun  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
382*4882a593Smuzhiyun  * and then roundup to the next cell, calc the table entry one below,
383*4882a593Smuzhiyun  * and compare.
384*4882a593Smuzhiyun  */
__detect_linklayer(struct tc_ratespec * r,__u32 * rtab)385*4882a593Smuzhiyun static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
386*4882a593Smuzhiyun {
387*4882a593Smuzhiyun 	int low       = roundup(r->mpu, 48);
388*4882a593Smuzhiyun 	int high      = roundup(low+1, 48);
389*4882a593Smuzhiyun 	int cell_low  = low >> r->cell_log;
390*4882a593Smuzhiyun 	int cell_high = (high >> r->cell_log) - 1;
391*4882a593Smuzhiyun 
392*4882a593Smuzhiyun 	/* rtab is too inaccurate at rates > 100Mbit/s */
393*4882a593Smuzhiyun 	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
394*4882a593Smuzhiyun 		pr_debug("TC linklayer: Giving up ATM detection\n");
395*4882a593Smuzhiyun 		return TC_LINKLAYER_ETHERNET;
396*4882a593Smuzhiyun 	}
397*4882a593Smuzhiyun 
398*4882a593Smuzhiyun 	if ((cell_high > cell_low) && (cell_high < 256)
399*4882a593Smuzhiyun 	    && (rtab[cell_low] == rtab[cell_high])) {
400*4882a593Smuzhiyun 		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
401*4882a593Smuzhiyun 			 cell_low, cell_high, rtab[cell_high]);
402*4882a593Smuzhiyun 		return TC_LINKLAYER_ATM;
403*4882a593Smuzhiyun 	}
404*4882a593Smuzhiyun 	return TC_LINKLAYER_ETHERNET;
405*4882a593Smuzhiyun }
406*4882a593Smuzhiyun 
407*4882a593Smuzhiyun static struct qdisc_rate_table *qdisc_rtab_list;
408*4882a593Smuzhiyun 
qdisc_get_rtab(struct tc_ratespec * r,struct nlattr * tab,struct netlink_ext_ack * extack)409*4882a593Smuzhiyun struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
410*4882a593Smuzhiyun 					struct nlattr *tab,
411*4882a593Smuzhiyun 					struct netlink_ext_ack *extack)
412*4882a593Smuzhiyun {
413*4882a593Smuzhiyun 	struct qdisc_rate_table *rtab;
414*4882a593Smuzhiyun 
415*4882a593Smuzhiyun 	if (tab == NULL || r->rate == 0 ||
416*4882a593Smuzhiyun 	    r->cell_log == 0 || r->cell_log >= 32 ||
417*4882a593Smuzhiyun 	    nla_len(tab) != TC_RTAB_SIZE) {
418*4882a593Smuzhiyun 		NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
419*4882a593Smuzhiyun 		return NULL;
420*4882a593Smuzhiyun 	}
421*4882a593Smuzhiyun 
422*4882a593Smuzhiyun 	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
423*4882a593Smuzhiyun 		if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
424*4882a593Smuzhiyun 		    !memcmp(&rtab->data, nla_data(tab), 1024)) {
425*4882a593Smuzhiyun 			rtab->refcnt++;
426*4882a593Smuzhiyun 			return rtab;
427*4882a593Smuzhiyun 		}
428*4882a593Smuzhiyun 	}
429*4882a593Smuzhiyun 
430*4882a593Smuzhiyun 	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
431*4882a593Smuzhiyun 	if (rtab) {
432*4882a593Smuzhiyun 		rtab->rate = *r;
433*4882a593Smuzhiyun 		rtab->refcnt = 1;
434*4882a593Smuzhiyun 		memcpy(rtab->data, nla_data(tab), 1024);
435*4882a593Smuzhiyun 		if (r->linklayer == TC_LINKLAYER_UNAWARE)
436*4882a593Smuzhiyun 			r->linklayer = __detect_linklayer(r, rtab->data);
437*4882a593Smuzhiyun 		rtab->next = qdisc_rtab_list;
438*4882a593Smuzhiyun 		qdisc_rtab_list = rtab;
439*4882a593Smuzhiyun 	} else {
440*4882a593Smuzhiyun 		NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
441*4882a593Smuzhiyun 	}
442*4882a593Smuzhiyun 	return rtab;
443*4882a593Smuzhiyun }
444*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_get_rtab);
445*4882a593Smuzhiyun 
qdisc_put_rtab(struct qdisc_rate_table * tab)446*4882a593Smuzhiyun void qdisc_put_rtab(struct qdisc_rate_table *tab)
447*4882a593Smuzhiyun {
448*4882a593Smuzhiyun 	struct qdisc_rate_table *rtab, **rtabp;
449*4882a593Smuzhiyun 
450*4882a593Smuzhiyun 	if (!tab || --tab->refcnt)
451*4882a593Smuzhiyun 		return;
452*4882a593Smuzhiyun 
453*4882a593Smuzhiyun 	for (rtabp = &qdisc_rtab_list;
454*4882a593Smuzhiyun 	     (rtab = *rtabp) != NULL;
455*4882a593Smuzhiyun 	     rtabp = &rtab->next) {
456*4882a593Smuzhiyun 		if (rtab == tab) {
457*4882a593Smuzhiyun 			*rtabp = rtab->next;
458*4882a593Smuzhiyun 			kfree(rtab);
459*4882a593Smuzhiyun 			return;
460*4882a593Smuzhiyun 		}
461*4882a593Smuzhiyun 	}
462*4882a593Smuzhiyun }
463*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_put_rtab);
464*4882a593Smuzhiyun 
465*4882a593Smuzhiyun static LIST_HEAD(qdisc_stab_list);
466*4882a593Smuzhiyun 
467*4882a593Smuzhiyun static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
468*4882a593Smuzhiyun 	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
469*4882a593Smuzhiyun 	[TCA_STAB_DATA] = { .type = NLA_BINARY },
470*4882a593Smuzhiyun };
471*4882a593Smuzhiyun 
qdisc_get_stab(struct nlattr * opt,struct netlink_ext_ack * extack)472*4882a593Smuzhiyun static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
473*4882a593Smuzhiyun 					       struct netlink_ext_ack *extack)
474*4882a593Smuzhiyun {
475*4882a593Smuzhiyun 	struct nlattr *tb[TCA_STAB_MAX + 1];
476*4882a593Smuzhiyun 	struct qdisc_size_table *stab;
477*4882a593Smuzhiyun 	struct tc_sizespec *s;
478*4882a593Smuzhiyun 	unsigned int tsize = 0;
479*4882a593Smuzhiyun 	u16 *tab = NULL;
480*4882a593Smuzhiyun 	int err;
481*4882a593Smuzhiyun 
482*4882a593Smuzhiyun 	err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
483*4882a593Smuzhiyun 					  extack);
484*4882a593Smuzhiyun 	if (err < 0)
485*4882a593Smuzhiyun 		return ERR_PTR(err);
486*4882a593Smuzhiyun 	if (!tb[TCA_STAB_BASE]) {
487*4882a593Smuzhiyun 		NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
488*4882a593Smuzhiyun 		return ERR_PTR(-EINVAL);
489*4882a593Smuzhiyun 	}
490*4882a593Smuzhiyun 
491*4882a593Smuzhiyun 	s = nla_data(tb[TCA_STAB_BASE]);
492*4882a593Smuzhiyun 
493*4882a593Smuzhiyun 	if (s->tsize > 0) {
494*4882a593Smuzhiyun 		if (!tb[TCA_STAB_DATA]) {
495*4882a593Smuzhiyun 			NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
496*4882a593Smuzhiyun 			return ERR_PTR(-EINVAL);
497*4882a593Smuzhiyun 		}
498*4882a593Smuzhiyun 		tab = nla_data(tb[TCA_STAB_DATA]);
499*4882a593Smuzhiyun 		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
500*4882a593Smuzhiyun 	}
501*4882a593Smuzhiyun 
502*4882a593Smuzhiyun 	if (tsize != s->tsize || (!tab && tsize > 0)) {
503*4882a593Smuzhiyun 		NL_SET_ERR_MSG(extack, "Invalid size of size table");
504*4882a593Smuzhiyun 		return ERR_PTR(-EINVAL);
505*4882a593Smuzhiyun 	}
506*4882a593Smuzhiyun 
507*4882a593Smuzhiyun 	list_for_each_entry(stab, &qdisc_stab_list, list) {
508*4882a593Smuzhiyun 		if (memcmp(&stab->szopts, s, sizeof(*s)))
509*4882a593Smuzhiyun 			continue;
510*4882a593Smuzhiyun 		if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
511*4882a593Smuzhiyun 			continue;
512*4882a593Smuzhiyun 		stab->refcnt++;
513*4882a593Smuzhiyun 		return stab;
514*4882a593Smuzhiyun 	}
515*4882a593Smuzhiyun 
516*4882a593Smuzhiyun 	if (s->size_log > STAB_SIZE_LOG_MAX ||
517*4882a593Smuzhiyun 	    s->cell_log > STAB_SIZE_LOG_MAX) {
518*4882a593Smuzhiyun 		NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
519*4882a593Smuzhiyun 		return ERR_PTR(-EINVAL);
520*4882a593Smuzhiyun 	}
521*4882a593Smuzhiyun 
522*4882a593Smuzhiyun 	stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
523*4882a593Smuzhiyun 	if (!stab)
524*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
525*4882a593Smuzhiyun 
526*4882a593Smuzhiyun 	stab->refcnt = 1;
527*4882a593Smuzhiyun 	stab->szopts = *s;
528*4882a593Smuzhiyun 	if (tsize > 0)
529*4882a593Smuzhiyun 		memcpy(stab->data, tab, tsize * sizeof(u16));
530*4882a593Smuzhiyun 
531*4882a593Smuzhiyun 	list_add_tail(&stab->list, &qdisc_stab_list);
532*4882a593Smuzhiyun 
533*4882a593Smuzhiyun 	return stab;
534*4882a593Smuzhiyun }
535*4882a593Smuzhiyun 
qdisc_put_stab(struct qdisc_size_table * tab)536*4882a593Smuzhiyun void qdisc_put_stab(struct qdisc_size_table *tab)
537*4882a593Smuzhiyun {
538*4882a593Smuzhiyun 	if (!tab)
539*4882a593Smuzhiyun 		return;
540*4882a593Smuzhiyun 
541*4882a593Smuzhiyun 	if (--tab->refcnt == 0) {
542*4882a593Smuzhiyun 		list_del(&tab->list);
543*4882a593Smuzhiyun 		kfree_rcu(tab, rcu);
544*4882a593Smuzhiyun 	}
545*4882a593Smuzhiyun }
546*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_put_stab);
547*4882a593Smuzhiyun 
qdisc_dump_stab(struct sk_buff * skb,struct qdisc_size_table * stab)548*4882a593Smuzhiyun static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
549*4882a593Smuzhiyun {
550*4882a593Smuzhiyun 	struct nlattr *nest;
551*4882a593Smuzhiyun 
552*4882a593Smuzhiyun 	nest = nla_nest_start_noflag(skb, TCA_STAB);
553*4882a593Smuzhiyun 	if (nest == NULL)
554*4882a593Smuzhiyun 		goto nla_put_failure;
555*4882a593Smuzhiyun 	if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
556*4882a593Smuzhiyun 		goto nla_put_failure;
557*4882a593Smuzhiyun 	nla_nest_end(skb, nest);
558*4882a593Smuzhiyun 
559*4882a593Smuzhiyun 	return skb->len;
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun nla_put_failure:
562*4882a593Smuzhiyun 	return -1;
563*4882a593Smuzhiyun }
564*4882a593Smuzhiyun 
__qdisc_calculate_pkt_len(struct sk_buff * skb,const struct qdisc_size_table * stab)565*4882a593Smuzhiyun void __qdisc_calculate_pkt_len(struct sk_buff *skb,
566*4882a593Smuzhiyun 			       const struct qdisc_size_table *stab)
567*4882a593Smuzhiyun {
568*4882a593Smuzhiyun 	int pkt_len, slot;
569*4882a593Smuzhiyun 
570*4882a593Smuzhiyun 	pkt_len = skb->len + stab->szopts.overhead;
571*4882a593Smuzhiyun 	if (unlikely(!stab->szopts.tsize))
572*4882a593Smuzhiyun 		goto out;
573*4882a593Smuzhiyun 
574*4882a593Smuzhiyun 	slot = pkt_len + stab->szopts.cell_align;
575*4882a593Smuzhiyun 	if (unlikely(slot < 0))
576*4882a593Smuzhiyun 		slot = 0;
577*4882a593Smuzhiyun 
578*4882a593Smuzhiyun 	slot >>= stab->szopts.cell_log;
579*4882a593Smuzhiyun 	if (likely(slot < stab->szopts.tsize))
580*4882a593Smuzhiyun 		pkt_len = stab->data[slot];
581*4882a593Smuzhiyun 	else
582*4882a593Smuzhiyun 		pkt_len = stab->data[stab->szopts.tsize - 1] *
583*4882a593Smuzhiyun 				(slot / stab->szopts.tsize) +
584*4882a593Smuzhiyun 				stab->data[slot % stab->szopts.tsize];
585*4882a593Smuzhiyun 
586*4882a593Smuzhiyun 	pkt_len <<= stab->szopts.size_log;
587*4882a593Smuzhiyun out:
588*4882a593Smuzhiyun 	if (unlikely(pkt_len < 1))
589*4882a593Smuzhiyun 		pkt_len = 1;
590*4882a593Smuzhiyun 	qdisc_skb_cb(skb)->pkt_len = pkt_len;
591*4882a593Smuzhiyun }
592*4882a593Smuzhiyun EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
593*4882a593Smuzhiyun 
qdisc_warn_nonwc(const char * txt,struct Qdisc * qdisc)594*4882a593Smuzhiyun void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
595*4882a593Smuzhiyun {
596*4882a593Smuzhiyun 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
597*4882a593Smuzhiyun 		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
598*4882a593Smuzhiyun 			txt, qdisc->ops->id, qdisc->handle >> 16);
599*4882a593Smuzhiyun 		qdisc->flags |= TCQ_F_WARN_NONWC;
600*4882a593Smuzhiyun 	}
601*4882a593Smuzhiyun }
602*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_warn_nonwc);
603*4882a593Smuzhiyun 
qdisc_watchdog(struct hrtimer * timer)604*4882a593Smuzhiyun static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
605*4882a593Smuzhiyun {
606*4882a593Smuzhiyun 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
607*4882a593Smuzhiyun 						 timer);
608*4882a593Smuzhiyun 
609*4882a593Smuzhiyun 	rcu_read_lock();
610*4882a593Smuzhiyun 	__netif_schedule(qdisc_root(wd->qdisc));
611*4882a593Smuzhiyun 	rcu_read_unlock();
612*4882a593Smuzhiyun 
613*4882a593Smuzhiyun 	return HRTIMER_NORESTART;
614*4882a593Smuzhiyun }
615*4882a593Smuzhiyun 
qdisc_watchdog_init_clockid(struct qdisc_watchdog * wd,struct Qdisc * qdisc,clockid_t clockid)616*4882a593Smuzhiyun void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
617*4882a593Smuzhiyun 				 clockid_t clockid)
618*4882a593Smuzhiyun {
619*4882a593Smuzhiyun 	hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
620*4882a593Smuzhiyun 	wd->timer.function = qdisc_watchdog;
621*4882a593Smuzhiyun 	wd->qdisc = qdisc;
622*4882a593Smuzhiyun }
623*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
624*4882a593Smuzhiyun 
qdisc_watchdog_init(struct qdisc_watchdog * wd,struct Qdisc * qdisc)625*4882a593Smuzhiyun void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
626*4882a593Smuzhiyun {
627*4882a593Smuzhiyun 	qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
628*4882a593Smuzhiyun }
629*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_watchdog_init);
630*4882a593Smuzhiyun 
qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog * wd,u64 expires,u64 delta_ns)631*4882a593Smuzhiyun void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
632*4882a593Smuzhiyun 				      u64 delta_ns)
633*4882a593Smuzhiyun {
634*4882a593Smuzhiyun 	if (test_bit(__QDISC_STATE_DEACTIVATED,
635*4882a593Smuzhiyun 		     &qdisc_root_sleeping(wd->qdisc)->state))
636*4882a593Smuzhiyun 		return;
637*4882a593Smuzhiyun 
638*4882a593Smuzhiyun 	if (hrtimer_is_queued(&wd->timer)) {
639*4882a593Smuzhiyun 		/* If timer is already set in [expires, expires + delta_ns],
640*4882a593Smuzhiyun 		 * do not reprogram it.
641*4882a593Smuzhiyun 		 */
642*4882a593Smuzhiyun 		if (wd->last_expires - expires <= delta_ns)
643*4882a593Smuzhiyun 			return;
644*4882a593Smuzhiyun 	}
645*4882a593Smuzhiyun 
646*4882a593Smuzhiyun 	wd->last_expires = expires;
647*4882a593Smuzhiyun 	hrtimer_start_range_ns(&wd->timer,
648*4882a593Smuzhiyun 			       ns_to_ktime(expires),
649*4882a593Smuzhiyun 			       delta_ns,
650*4882a593Smuzhiyun 			       HRTIMER_MODE_ABS_PINNED);
651*4882a593Smuzhiyun }
652*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
653*4882a593Smuzhiyun 
qdisc_watchdog_cancel(struct qdisc_watchdog * wd)654*4882a593Smuzhiyun void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
655*4882a593Smuzhiyun {
656*4882a593Smuzhiyun 	hrtimer_cancel(&wd->timer);
657*4882a593Smuzhiyun }
658*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_watchdog_cancel);
659*4882a593Smuzhiyun 
qdisc_class_hash_alloc(unsigned int n)660*4882a593Smuzhiyun static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
661*4882a593Smuzhiyun {
662*4882a593Smuzhiyun 	struct hlist_head *h;
663*4882a593Smuzhiyun 	unsigned int i;
664*4882a593Smuzhiyun 
665*4882a593Smuzhiyun 	h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
666*4882a593Smuzhiyun 
667*4882a593Smuzhiyun 	if (h != NULL) {
668*4882a593Smuzhiyun 		for (i = 0; i < n; i++)
669*4882a593Smuzhiyun 			INIT_HLIST_HEAD(&h[i]);
670*4882a593Smuzhiyun 	}
671*4882a593Smuzhiyun 	return h;
672*4882a593Smuzhiyun }
673*4882a593Smuzhiyun 
qdisc_class_hash_grow(struct Qdisc * sch,struct Qdisc_class_hash * clhash)674*4882a593Smuzhiyun void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
675*4882a593Smuzhiyun {
676*4882a593Smuzhiyun 	struct Qdisc_class_common *cl;
677*4882a593Smuzhiyun 	struct hlist_node *next;
678*4882a593Smuzhiyun 	struct hlist_head *nhash, *ohash;
679*4882a593Smuzhiyun 	unsigned int nsize, nmask, osize;
680*4882a593Smuzhiyun 	unsigned int i, h;
681*4882a593Smuzhiyun 
682*4882a593Smuzhiyun 	/* Rehash when load factor exceeds 0.75 */
683*4882a593Smuzhiyun 	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
684*4882a593Smuzhiyun 		return;
685*4882a593Smuzhiyun 	nsize = clhash->hashsize * 2;
686*4882a593Smuzhiyun 	nmask = nsize - 1;
687*4882a593Smuzhiyun 	nhash = qdisc_class_hash_alloc(nsize);
688*4882a593Smuzhiyun 	if (nhash == NULL)
689*4882a593Smuzhiyun 		return;
690*4882a593Smuzhiyun 
691*4882a593Smuzhiyun 	ohash = clhash->hash;
692*4882a593Smuzhiyun 	osize = clhash->hashsize;
693*4882a593Smuzhiyun 
694*4882a593Smuzhiyun 	sch_tree_lock(sch);
695*4882a593Smuzhiyun 	for (i = 0; i < osize; i++) {
696*4882a593Smuzhiyun 		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
697*4882a593Smuzhiyun 			h = qdisc_class_hash(cl->classid, nmask);
698*4882a593Smuzhiyun 			hlist_add_head(&cl->hnode, &nhash[h]);
699*4882a593Smuzhiyun 		}
700*4882a593Smuzhiyun 	}
701*4882a593Smuzhiyun 	clhash->hash     = nhash;
702*4882a593Smuzhiyun 	clhash->hashsize = nsize;
703*4882a593Smuzhiyun 	clhash->hashmask = nmask;
704*4882a593Smuzhiyun 	sch_tree_unlock(sch);
705*4882a593Smuzhiyun 
706*4882a593Smuzhiyun 	kvfree(ohash);
707*4882a593Smuzhiyun }
708*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_class_hash_grow);
709*4882a593Smuzhiyun 
qdisc_class_hash_init(struct Qdisc_class_hash * clhash)710*4882a593Smuzhiyun int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
711*4882a593Smuzhiyun {
712*4882a593Smuzhiyun 	unsigned int size = 4;
713*4882a593Smuzhiyun 
714*4882a593Smuzhiyun 	clhash->hash = qdisc_class_hash_alloc(size);
715*4882a593Smuzhiyun 	if (!clhash->hash)
716*4882a593Smuzhiyun 		return -ENOMEM;
717*4882a593Smuzhiyun 	clhash->hashsize  = size;
718*4882a593Smuzhiyun 	clhash->hashmask  = size - 1;
719*4882a593Smuzhiyun 	clhash->hashelems = 0;
720*4882a593Smuzhiyun 	return 0;
721*4882a593Smuzhiyun }
722*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_class_hash_init);
723*4882a593Smuzhiyun 
qdisc_class_hash_destroy(struct Qdisc_class_hash * clhash)724*4882a593Smuzhiyun void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
725*4882a593Smuzhiyun {
726*4882a593Smuzhiyun 	kvfree(clhash->hash);
727*4882a593Smuzhiyun }
728*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_class_hash_destroy);
729*4882a593Smuzhiyun 
qdisc_class_hash_insert(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)730*4882a593Smuzhiyun void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
731*4882a593Smuzhiyun 			     struct Qdisc_class_common *cl)
732*4882a593Smuzhiyun {
733*4882a593Smuzhiyun 	unsigned int h;
734*4882a593Smuzhiyun 
735*4882a593Smuzhiyun 	INIT_HLIST_NODE(&cl->hnode);
736*4882a593Smuzhiyun 	h = qdisc_class_hash(cl->classid, clhash->hashmask);
737*4882a593Smuzhiyun 	hlist_add_head(&cl->hnode, &clhash->hash[h]);
738*4882a593Smuzhiyun 	clhash->hashelems++;
739*4882a593Smuzhiyun }
740*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_class_hash_insert);
741*4882a593Smuzhiyun 
qdisc_class_hash_remove(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)742*4882a593Smuzhiyun void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
743*4882a593Smuzhiyun 			     struct Qdisc_class_common *cl)
744*4882a593Smuzhiyun {
745*4882a593Smuzhiyun 	hlist_del(&cl->hnode);
746*4882a593Smuzhiyun 	clhash->hashelems--;
747*4882a593Smuzhiyun }
748*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_class_hash_remove);
749*4882a593Smuzhiyun 
750*4882a593Smuzhiyun /* Allocate an unique handle from space managed by kernel
751*4882a593Smuzhiyun  * Possible range is [8000-FFFF]:0000 (0x8000 values)
752*4882a593Smuzhiyun  */
qdisc_alloc_handle(struct net_device * dev)753*4882a593Smuzhiyun static u32 qdisc_alloc_handle(struct net_device *dev)
754*4882a593Smuzhiyun {
755*4882a593Smuzhiyun 	int i = 0x8000;
756*4882a593Smuzhiyun 	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
757*4882a593Smuzhiyun 
758*4882a593Smuzhiyun 	do {
759*4882a593Smuzhiyun 		autohandle += TC_H_MAKE(0x10000U, 0);
760*4882a593Smuzhiyun 		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
761*4882a593Smuzhiyun 			autohandle = TC_H_MAKE(0x80000000U, 0);
762*4882a593Smuzhiyun 		if (!qdisc_lookup(dev, autohandle))
763*4882a593Smuzhiyun 			return autohandle;
764*4882a593Smuzhiyun 		cond_resched();
765*4882a593Smuzhiyun 	} while	(--i > 0);
766*4882a593Smuzhiyun 
767*4882a593Smuzhiyun 	return 0;
768*4882a593Smuzhiyun }
769*4882a593Smuzhiyun 
qdisc_tree_reduce_backlog(struct Qdisc * sch,int n,int len)770*4882a593Smuzhiyun void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
771*4882a593Smuzhiyun {
772*4882a593Smuzhiyun 	bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
773*4882a593Smuzhiyun 	const struct Qdisc_class_ops *cops;
774*4882a593Smuzhiyun 	unsigned long cl;
775*4882a593Smuzhiyun 	u32 parentid;
776*4882a593Smuzhiyun 	bool notify;
777*4882a593Smuzhiyun 	int drops;
778*4882a593Smuzhiyun 
779*4882a593Smuzhiyun 	if (n == 0 && len == 0)
780*4882a593Smuzhiyun 		return;
781*4882a593Smuzhiyun 	drops = max_t(int, n, 0);
782*4882a593Smuzhiyun 	rcu_read_lock();
783*4882a593Smuzhiyun 	while ((parentid = sch->parent)) {
784*4882a593Smuzhiyun 		if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
785*4882a593Smuzhiyun 			break;
786*4882a593Smuzhiyun 
787*4882a593Smuzhiyun 		if (sch->flags & TCQ_F_NOPARENT)
788*4882a593Smuzhiyun 			break;
789*4882a593Smuzhiyun 		/* Notify parent qdisc only if child qdisc becomes empty.
790*4882a593Smuzhiyun 		 *
791*4882a593Smuzhiyun 		 * If child was empty even before update then backlog
792*4882a593Smuzhiyun 		 * counter is screwed and we skip notification because
793*4882a593Smuzhiyun 		 * parent class is already passive.
794*4882a593Smuzhiyun 		 *
795*4882a593Smuzhiyun 		 * If the original child was offloaded then it is allowed
796*4882a593Smuzhiyun 		 * to be seem as empty, so the parent is notified anyway.
797*4882a593Smuzhiyun 		 */
798*4882a593Smuzhiyun 		notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
799*4882a593Smuzhiyun 						       !qdisc_is_offloaded);
800*4882a593Smuzhiyun 		/* TODO: perform the search on a per txq basis */
801*4882a593Smuzhiyun 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
802*4882a593Smuzhiyun 		if (sch == NULL) {
803*4882a593Smuzhiyun 			WARN_ON_ONCE(parentid != TC_H_ROOT);
804*4882a593Smuzhiyun 			break;
805*4882a593Smuzhiyun 		}
806*4882a593Smuzhiyun 		cops = sch->ops->cl_ops;
807*4882a593Smuzhiyun 		if (notify && cops->qlen_notify) {
808*4882a593Smuzhiyun 			cl = cops->find(sch, parentid);
809*4882a593Smuzhiyun 			cops->qlen_notify(sch, cl);
810*4882a593Smuzhiyun 		}
811*4882a593Smuzhiyun 		sch->q.qlen -= n;
812*4882a593Smuzhiyun 		sch->qstats.backlog -= len;
813*4882a593Smuzhiyun 		__qdisc_qstats_drop(sch, drops);
814*4882a593Smuzhiyun 	}
815*4882a593Smuzhiyun 	rcu_read_unlock();
816*4882a593Smuzhiyun }
817*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
818*4882a593Smuzhiyun 
qdisc_offload_dump_helper(struct Qdisc * sch,enum tc_setup_type type,void * type_data)819*4882a593Smuzhiyun int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
820*4882a593Smuzhiyun 			      void *type_data)
821*4882a593Smuzhiyun {
822*4882a593Smuzhiyun 	struct net_device *dev = qdisc_dev(sch);
823*4882a593Smuzhiyun 	int err;
824*4882a593Smuzhiyun 
825*4882a593Smuzhiyun 	sch->flags &= ~TCQ_F_OFFLOADED;
826*4882a593Smuzhiyun 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
827*4882a593Smuzhiyun 		return 0;
828*4882a593Smuzhiyun 
829*4882a593Smuzhiyun 	err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
830*4882a593Smuzhiyun 	if (err == -EOPNOTSUPP)
831*4882a593Smuzhiyun 		return 0;
832*4882a593Smuzhiyun 
833*4882a593Smuzhiyun 	if (!err)
834*4882a593Smuzhiyun 		sch->flags |= TCQ_F_OFFLOADED;
835*4882a593Smuzhiyun 
836*4882a593Smuzhiyun 	return err;
837*4882a593Smuzhiyun }
838*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_offload_dump_helper);
839*4882a593Smuzhiyun 
qdisc_offload_graft_helper(struct net_device * dev,struct Qdisc * sch,struct Qdisc * new,struct Qdisc * old,enum tc_setup_type type,void * type_data,struct netlink_ext_ack * extack)840*4882a593Smuzhiyun void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
841*4882a593Smuzhiyun 				struct Qdisc *new, struct Qdisc *old,
842*4882a593Smuzhiyun 				enum tc_setup_type type, void *type_data,
843*4882a593Smuzhiyun 				struct netlink_ext_ack *extack)
844*4882a593Smuzhiyun {
845*4882a593Smuzhiyun 	bool any_qdisc_is_offloaded;
846*4882a593Smuzhiyun 	int err;
847*4882a593Smuzhiyun 
848*4882a593Smuzhiyun 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
849*4882a593Smuzhiyun 		return;
850*4882a593Smuzhiyun 
851*4882a593Smuzhiyun 	err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
852*4882a593Smuzhiyun 
853*4882a593Smuzhiyun 	/* Don't report error if the graft is part of destroy operation. */
854*4882a593Smuzhiyun 	if (!err || !new || new == &noop_qdisc)
855*4882a593Smuzhiyun 		return;
856*4882a593Smuzhiyun 
857*4882a593Smuzhiyun 	/* Don't report error if the parent, the old child and the new
858*4882a593Smuzhiyun 	 * one are not offloaded.
859*4882a593Smuzhiyun 	 */
860*4882a593Smuzhiyun 	any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
861*4882a593Smuzhiyun 	any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
862*4882a593Smuzhiyun 	any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
863*4882a593Smuzhiyun 
864*4882a593Smuzhiyun 	if (any_qdisc_is_offloaded)
865*4882a593Smuzhiyun 		NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
866*4882a593Smuzhiyun }
867*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_offload_graft_helper);
868*4882a593Smuzhiyun 
qdisc_offload_graft_root(struct net_device * dev,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)869*4882a593Smuzhiyun static void qdisc_offload_graft_root(struct net_device *dev,
870*4882a593Smuzhiyun 				     struct Qdisc *new, struct Qdisc *old,
871*4882a593Smuzhiyun 				     struct netlink_ext_ack *extack)
872*4882a593Smuzhiyun {
873*4882a593Smuzhiyun 	struct tc_root_qopt_offload graft_offload = {
874*4882a593Smuzhiyun 		.command	= TC_ROOT_GRAFT,
875*4882a593Smuzhiyun 		.handle		= new ? new->handle : 0,
876*4882a593Smuzhiyun 		.ingress	= (new && new->flags & TCQ_F_INGRESS) ||
877*4882a593Smuzhiyun 				  (old && old->flags & TCQ_F_INGRESS),
878*4882a593Smuzhiyun 	};
879*4882a593Smuzhiyun 
880*4882a593Smuzhiyun 	qdisc_offload_graft_helper(dev, NULL, new, old,
881*4882a593Smuzhiyun 				   TC_SETUP_ROOT_QDISC, &graft_offload, extack);
882*4882a593Smuzhiyun }
883*4882a593Smuzhiyun 
tc_fill_qdisc(struct sk_buff * skb,struct Qdisc * q,u32 clid,u32 portid,u32 seq,u16 flags,int event)884*4882a593Smuzhiyun static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
885*4882a593Smuzhiyun 			 u32 portid, u32 seq, u16 flags, int event)
886*4882a593Smuzhiyun {
887*4882a593Smuzhiyun 	struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
888*4882a593Smuzhiyun 	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
889*4882a593Smuzhiyun 	struct tcmsg *tcm;
890*4882a593Smuzhiyun 	struct nlmsghdr  *nlh;
891*4882a593Smuzhiyun 	unsigned char *b = skb_tail_pointer(skb);
892*4882a593Smuzhiyun 	struct gnet_dump d;
893*4882a593Smuzhiyun 	struct qdisc_size_table *stab;
894*4882a593Smuzhiyun 	u32 block_index;
895*4882a593Smuzhiyun 	__u32 qlen;
896*4882a593Smuzhiyun 
897*4882a593Smuzhiyun 	cond_resched();
898*4882a593Smuzhiyun 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
899*4882a593Smuzhiyun 	if (!nlh)
900*4882a593Smuzhiyun 		goto out_nlmsg_trim;
901*4882a593Smuzhiyun 	tcm = nlmsg_data(nlh);
902*4882a593Smuzhiyun 	tcm->tcm_family = AF_UNSPEC;
903*4882a593Smuzhiyun 	tcm->tcm__pad1 = 0;
904*4882a593Smuzhiyun 	tcm->tcm__pad2 = 0;
905*4882a593Smuzhiyun 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
906*4882a593Smuzhiyun 	tcm->tcm_parent = clid;
907*4882a593Smuzhiyun 	tcm->tcm_handle = q->handle;
908*4882a593Smuzhiyun 	tcm->tcm_info = refcount_read(&q->refcnt);
909*4882a593Smuzhiyun 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
910*4882a593Smuzhiyun 		goto nla_put_failure;
911*4882a593Smuzhiyun 	if (q->ops->ingress_block_get) {
912*4882a593Smuzhiyun 		block_index = q->ops->ingress_block_get(q);
913*4882a593Smuzhiyun 		if (block_index &&
914*4882a593Smuzhiyun 		    nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
915*4882a593Smuzhiyun 			goto nla_put_failure;
916*4882a593Smuzhiyun 	}
917*4882a593Smuzhiyun 	if (q->ops->egress_block_get) {
918*4882a593Smuzhiyun 		block_index = q->ops->egress_block_get(q);
919*4882a593Smuzhiyun 		if (block_index &&
920*4882a593Smuzhiyun 		    nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
921*4882a593Smuzhiyun 			goto nla_put_failure;
922*4882a593Smuzhiyun 	}
923*4882a593Smuzhiyun 	if (q->ops->dump && q->ops->dump(q, skb) < 0)
924*4882a593Smuzhiyun 		goto nla_put_failure;
925*4882a593Smuzhiyun 	if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
926*4882a593Smuzhiyun 		goto nla_put_failure;
927*4882a593Smuzhiyun 	qlen = qdisc_qlen_sum(q);
928*4882a593Smuzhiyun 
929*4882a593Smuzhiyun 	stab = rtnl_dereference(q->stab);
930*4882a593Smuzhiyun 	if (stab && qdisc_dump_stab(skb, stab) < 0)
931*4882a593Smuzhiyun 		goto nla_put_failure;
932*4882a593Smuzhiyun 
933*4882a593Smuzhiyun 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
934*4882a593Smuzhiyun 					 NULL, &d, TCA_PAD) < 0)
935*4882a593Smuzhiyun 		goto nla_put_failure;
936*4882a593Smuzhiyun 
937*4882a593Smuzhiyun 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
938*4882a593Smuzhiyun 		goto nla_put_failure;
939*4882a593Smuzhiyun 
940*4882a593Smuzhiyun 	if (qdisc_is_percpu_stats(q)) {
941*4882a593Smuzhiyun 		cpu_bstats = q->cpu_bstats;
942*4882a593Smuzhiyun 		cpu_qstats = q->cpu_qstats;
943*4882a593Smuzhiyun 	}
944*4882a593Smuzhiyun 
945*4882a593Smuzhiyun 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
946*4882a593Smuzhiyun 				  &d, cpu_bstats, &q->bstats) < 0 ||
947*4882a593Smuzhiyun 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
948*4882a593Smuzhiyun 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
949*4882a593Smuzhiyun 		goto nla_put_failure;
950*4882a593Smuzhiyun 
951*4882a593Smuzhiyun 	if (gnet_stats_finish_copy(&d) < 0)
952*4882a593Smuzhiyun 		goto nla_put_failure;
953*4882a593Smuzhiyun 
954*4882a593Smuzhiyun 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
955*4882a593Smuzhiyun 	return skb->len;
956*4882a593Smuzhiyun 
957*4882a593Smuzhiyun out_nlmsg_trim:
958*4882a593Smuzhiyun nla_put_failure:
959*4882a593Smuzhiyun 	nlmsg_trim(skb, b);
960*4882a593Smuzhiyun 	return -1;
961*4882a593Smuzhiyun }
962*4882a593Smuzhiyun 
tc_qdisc_dump_ignore(struct Qdisc * q,bool dump_invisible)963*4882a593Smuzhiyun static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
964*4882a593Smuzhiyun {
965*4882a593Smuzhiyun 	if (q->flags & TCQ_F_BUILTIN)
966*4882a593Smuzhiyun 		return true;
967*4882a593Smuzhiyun 	if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
968*4882a593Smuzhiyun 		return true;
969*4882a593Smuzhiyun 
970*4882a593Smuzhiyun 	return false;
971*4882a593Smuzhiyun }
972*4882a593Smuzhiyun 
qdisc_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new)973*4882a593Smuzhiyun static int qdisc_notify(struct net *net, struct sk_buff *oskb,
974*4882a593Smuzhiyun 			struct nlmsghdr *n, u32 clid,
975*4882a593Smuzhiyun 			struct Qdisc *old, struct Qdisc *new)
976*4882a593Smuzhiyun {
977*4882a593Smuzhiyun 	struct sk_buff *skb;
978*4882a593Smuzhiyun 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
979*4882a593Smuzhiyun 
980*4882a593Smuzhiyun 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
981*4882a593Smuzhiyun 	if (!skb)
982*4882a593Smuzhiyun 		return -ENOBUFS;
983*4882a593Smuzhiyun 
984*4882a593Smuzhiyun 	if (old && !tc_qdisc_dump_ignore(old, false)) {
985*4882a593Smuzhiyun 		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
986*4882a593Smuzhiyun 				  0, RTM_DELQDISC) < 0)
987*4882a593Smuzhiyun 			goto err_out;
988*4882a593Smuzhiyun 	}
989*4882a593Smuzhiyun 	if (new && !tc_qdisc_dump_ignore(new, false)) {
990*4882a593Smuzhiyun 		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
991*4882a593Smuzhiyun 				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
992*4882a593Smuzhiyun 			goto err_out;
993*4882a593Smuzhiyun 	}
994*4882a593Smuzhiyun 
995*4882a593Smuzhiyun 	if (skb->len)
996*4882a593Smuzhiyun 		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
997*4882a593Smuzhiyun 				      n->nlmsg_flags & NLM_F_ECHO);
998*4882a593Smuzhiyun 
999*4882a593Smuzhiyun err_out:
1000*4882a593Smuzhiyun 	kfree_skb(skb);
1001*4882a593Smuzhiyun 	return -EINVAL;
1002*4882a593Smuzhiyun }
1003*4882a593Smuzhiyun 
notify_and_destroy(struct net * net,struct sk_buff * skb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new)1004*4882a593Smuzhiyun static void notify_and_destroy(struct net *net, struct sk_buff *skb,
1005*4882a593Smuzhiyun 			       struct nlmsghdr *n, u32 clid,
1006*4882a593Smuzhiyun 			       struct Qdisc *old, struct Qdisc *new)
1007*4882a593Smuzhiyun {
1008*4882a593Smuzhiyun 	if (new || old)
1009*4882a593Smuzhiyun 		qdisc_notify(net, skb, n, clid, old, new);
1010*4882a593Smuzhiyun 
1011*4882a593Smuzhiyun 	if (old)
1012*4882a593Smuzhiyun 		qdisc_put(old);
1013*4882a593Smuzhiyun }
1014*4882a593Smuzhiyun 
qdisc_clear_nolock(struct Qdisc * sch)1015*4882a593Smuzhiyun static void qdisc_clear_nolock(struct Qdisc *sch)
1016*4882a593Smuzhiyun {
1017*4882a593Smuzhiyun 	sch->flags &= ~TCQ_F_NOLOCK;
1018*4882a593Smuzhiyun 	if (!(sch->flags & TCQ_F_CPUSTATS))
1019*4882a593Smuzhiyun 		return;
1020*4882a593Smuzhiyun 
1021*4882a593Smuzhiyun 	free_percpu(sch->cpu_bstats);
1022*4882a593Smuzhiyun 	free_percpu(sch->cpu_qstats);
1023*4882a593Smuzhiyun 	sch->cpu_bstats = NULL;
1024*4882a593Smuzhiyun 	sch->cpu_qstats = NULL;
1025*4882a593Smuzhiyun 	sch->flags &= ~TCQ_F_CPUSTATS;
1026*4882a593Smuzhiyun }
1027*4882a593Smuzhiyun 
1028*4882a593Smuzhiyun /* Graft qdisc "new" to class "classid" of qdisc "parent" or
1029*4882a593Smuzhiyun  * to device "dev".
1030*4882a593Smuzhiyun  *
1031*4882a593Smuzhiyun  * When appropriate send a netlink notification using 'skb'
1032*4882a593Smuzhiyun  * and "n".
1033*4882a593Smuzhiyun  *
1034*4882a593Smuzhiyun  * On success, destroy old qdisc.
1035*4882a593Smuzhiyun  */
1036*4882a593Smuzhiyun 
qdisc_graft(struct net_device * dev,struct Qdisc * parent,struct sk_buff * skb,struct nlmsghdr * n,u32 classid,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)1037*4882a593Smuzhiyun static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1038*4882a593Smuzhiyun 		       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1039*4882a593Smuzhiyun 		       struct Qdisc *new, struct Qdisc *old,
1040*4882a593Smuzhiyun 		       struct netlink_ext_ack *extack)
1041*4882a593Smuzhiyun {
1042*4882a593Smuzhiyun 	struct Qdisc *q = old;
1043*4882a593Smuzhiyun 	struct net *net = dev_net(dev);
1044*4882a593Smuzhiyun 
1045*4882a593Smuzhiyun 	if (parent == NULL) {
1046*4882a593Smuzhiyun 		unsigned int i, num_q, ingress;
1047*4882a593Smuzhiyun 
1048*4882a593Smuzhiyun 		ingress = 0;
1049*4882a593Smuzhiyun 		num_q = dev->num_tx_queues;
1050*4882a593Smuzhiyun 		if ((q && q->flags & TCQ_F_INGRESS) ||
1051*4882a593Smuzhiyun 		    (new && new->flags & TCQ_F_INGRESS)) {
1052*4882a593Smuzhiyun 			num_q = 1;
1053*4882a593Smuzhiyun 			ingress = 1;
1054*4882a593Smuzhiyun 			if (!dev_ingress_queue(dev)) {
1055*4882a593Smuzhiyun 				NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1056*4882a593Smuzhiyun 				return -ENOENT;
1057*4882a593Smuzhiyun 			}
1058*4882a593Smuzhiyun 		}
1059*4882a593Smuzhiyun 
1060*4882a593Smuzhiyun 		if (dev->flags & IFF_UP)
1061*4882a593Smuzhiyun 			dev_deactivate(dev);
1062*4882a593Smuzhiyun 
1063*4882a593Smuzhiyun 		qdisc_offload_graft_root(dev, new, old, extack);
1064*4882a593Smuzhiyun 
1065*4882a593Smuzhiyun 		if (new && new->ops->attach)
1066*4882a593Smuzhiyun 			goto skip;
1067*4882a593Smuzhiyun 
1068*4882a593Smuzhiyun 		for (i = 0; i < num_q; i++) {
1069*4882a593Smuzhiyun 			struct netdev_queue *dev_queue = dev_ingress_queue(dev);
1070*4882a593Smuzhiyun 
1071*4882a593Smuzhiyun 			if (!ingress)
1072*4882a593Smuzhiyun 				dev_queue = netdev_get_tx_queue(dev, i);
1073*4882a593Smuzhiyun 
1074*4882a593Smuzhiyun 			old = dev_graft_qdisc(dev_queue, new);
1075*4882a593Smuzhiyun 			if (new && i > 0)
1076*4882a593Smuzhiyun 				qdisc_refcount_inc(new);
1077*4882a593Smuzhiyun 
1078*4882a593Smuzhiyun 			if (!ingress)
1079*4882a593Smuzhiyun 				qdisc_put(old);
1080*4882a593Smuzhiyun 		}
1081*4882a593Smuzhiyun 
1082*4882a593Smuzhiyun skip:
1083*4882a593Smuzhiyun 		if (!ingress) {
1084*4882a593Smuzhiyun 			old = rtnl_dereference(dev->qdisc);
1085*4882a593Smuzhiyun 			if (new && !new->ops->attach)
1086*4882a593Smuzhiyun 				qdisc_refcount_inc(new);
1087*4882a593Smuzhiyun 			rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
1088*4882a593Smuzhiyun 
1089*4882a593Smuzhiyun 			notify_and_destroy(net, skb, n, classid, old, new);
1090*4882a593Smuzhiyun 
1091*4882a593Smuzhiyun 			if (new && new->ops->attach)
1092*4882a593Smuzhiyun 				new->ops->attach(new);
1093*4882a593Smuzhiyun 		} else {
1094*4882a593Smuzhiyun 			notify_and_destroy(net, skb, n, classid, old, new);
1095*4882a593Smuzhiyun 		}
1096*4882a593Smuzhiyun 
1097*4882a593Smuzhiyun 		if (dev->flags & IFF_UP)
1098*4882a593Smuzhiyun 			dev_activate(dev);
1099*4882a593Smuzhiyun 	} else {
1100*4882a593Smuzhiyun 		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1101*4882a593Smuzhiyun 		unsigned long cl;
1102*4882a593Smuzhiyun 		int err;
1103*4882a593Smuzhiyun 
1104*4882a593Smuzhiyun 		/* Only support running class lockless if parent is lockless */
1105*4882a593Smuzhiyun 		if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
1106*4882a593Smuzhiyun 			qdisc_clear_nolock(new);
1107*4882a593Smuzhiyun 
1108*4882a593Smuzhiyun 		if (!cops || !cops->graft)
1109*4882a593Smuzhiyun 			return -EOPNOTSUPP;
1110*4882a593Smuzhiyun 
1111*4882a593Smuzhiyun 		cl = cops->find(parent, classid);
1112*4882a593Smuzhiyun 		if (!cl) {
1113*4882a593Smuzhiyun 			NL_SET_ERR_MSG(extack, "Specified class not found");
1114*4882a593Smuzhiyun 			return -ENOENT;
1115*4882a593Smuzhiyun 		}
1116*4882a593Smuzhiyun 
1117*4882a593Smuzhiyun 		err = cops->graft(parent, cl, new, &old, extack);
1118*4882a593Smuzhiyun 		if (err)
1119*4882a593Smuzhiyun 			return err;
1120*4882a593Smuzhiyun 		notify_and_destroy(net, skb, n, classid, old, new);
1121*4882a593Smuzhiyun 	}
1122*4882a593Smuzhiyun 	return 0;
1123*4882a593Smuzhiyun }
1124*4882a593Smuzhiyun 
qdisc_block_indexes_set(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)1125*4882a593Smuzhiyun static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1126*4882a593Smuzhiyun 				   struct netlink_ext_ack *extack)
1127*4882a593Smuzhiyun {
1128*4882a593Smuzhiyun 	u32 block_index;
1129*4882a593Smuzhiyun 
1130*4882a593Smuzhiyun 	if (tca[TCA_INGRESS_BLOCK]) {
1131*4882a593Smuzhiyun 		block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1132*4882a593Smuzhiyun 
1133*4882a593Smuzhiyun 		if (!block_index) {
1134*4882a593Smuzhiyun 			NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1135*4882a593Smuzhiyun 			return -EINVAL;
1136*4882a593Smuzhiyun 		}
1137*4882a593Smuzhiyun 		if (!sch->ops->ingress_block_set) {
1138*4882a593Smuzhiyun 			NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1139*4882a593Smuzhiyun 			return -EOPNOTSUPP;
1140*4882a593Smuzhiyun 		}
1141*4882a593Smuzhiyun 		sch->ops->ingress_block_set(sch, block_index);
1142*4882a593Smuzhiyun 	}
1143*4882a593Smuzhiyun 	if (tca[TCA_EGRESS_BLOCK]) {
1144*4882a593Smuzhiyun 		block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1145*4882a593Smuzhiyun 
1146*4882a593Smuzhiyun 		if (!block_index) {
1147*4882a593Smuzhiyun 			NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1148*4882a593Smuzhiyun 			return -EINVAL;
1149*4882a593Smuzhiyun 		}
1150*4882a593Smuzhiyun 		if (!sch->ops->egress_block_set) {
1151*4882a593Smuzhiyun 			NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1152*4882a593Smuzhiyun 			return -EOPNOTSUPP;
1153*4882a593Smuzhiyun 		}
1154*4882a593Smuzhiyun 		sch->ops->egress_block_set(sch, block_index);
1155*4882a593Smuzhiyun 	}
1156*4882a593Smuzhiyun 	return 0;
1157*4882a593Smuzhiyun }
1158*4882a593Smuzhiyun 
1159*4882a593Smuzhiyun /*
1160*4882a593Smuzhiyun    Allocate and initialize new qdisc.
1161*4882a593Smuzhiyun 
1162*4882a593Smuzhiyun    Parameters are passed via opt.
1163*4882a593Smuzhiyun  */
1164*4882a593Smuzhiyun 
qdisc_create(struct net_device * dev,struct netdev_queue * dev_queue,struct Qdisc * p,u32 parent,u32 handle,struct nlattr ** tca,int * errp,struct netlink_ext_ack * extack)1165*4882a593Smuzhiyun static struct Qdisc *qdisc_create(struct net_device *dev,
1166*4882a593Smuzhiyun 				  struct netdev_queue *dev_queue,
1167*4882a593Smuzhiyun 				  struct Qdisc *p, u32 parent, u32 handle,
1168*4882a593Smuzhiyun 				  struct nlattr **tca, int *errp,
1169*4882a593Smuzhiyun 				  struct netlink_ext_ack *extack)
1170*4882a593Smuzhiyun {
1171*4882a593Smuzhiyun 	int err;
1172*4882a593Smuzhiyun 	struct nlattr *kind = tca[TCA_KIND];
1173*4882a593Smuzhiyun 	struct Qdisc *sch;
1174*4882a593Smuzhiyun 	struct Qdisc_ops *ops;
1175*4882a593Smuzhiyun 	struct qdisc_size_table *stab;
1176*4882a593Smuzhiyun 
1177*4882a593Smuzhiyun 	ops = qdisc_lookup_ops(kind);
1178*4882a593Smuzhiyun #ifdef CONFIG_MODULES
1179*4882a593Smuzhiyun 	if (ops == NULL && kind != NULL) {
1180*4882a593Smuzhiyun 		char name[IFNAMSIZ];
1181*4882a593Smuzhiyun 		if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1182*4882a593Smuzhiyun 			/* We dropped the RTNL semaphore in order to
1183*4882a593Smuzhiyun 			 * perform the module load.  So, even if we
1184*4882a593Smuzhiyun 			 * succeeded in loading the module we have to
1185*4882a593Smuzhiyun 			 * tell the caller to replay the request.  We
1186*4882a593Smuzhiyun 			 * indicate this using -EAGAIN.
1187*4882a593Smuzhiyun 			 * We replay the request because the device may
1188*4882a593Smuzhiyun 			 * go away in the mean time.
1189*4882a593Smuzhiyun 			 */
1190*4882a593Smuzhiyun 			rtnl_unlock();
1191*4882a593Smuzhiyun 			request_module("sch_%s", name);
1192*4882a593Smuzhiyun 			rtnl_lock();
1193*4882a593Smuzhiyun 			ops = qdisc_lookup_ops(kind);
1194*4882a593Smuzhiyun 			if (ops != NULL) {
1195*4882a593Smuzhiyun 				/* We will try again qdisc_lookup_ops,
1196*4882a593Smuzhiyun 				 * so don't keep a reference.
1197*4882a593Smuzhiyun 				 */
1198*4882a593Smuzhiyun 				module_put(ops->owner);
1199*4882a593Smuzhiyun 				err = -EAGAIN;
1200*4882a593Smuzhiyun 				goto err_out;
1201*4882a593Smuzhiyun 			}
1202*4882a593Smuzhiyun 		}
1203*4882a593Smuzhiyun 	}
1204*4882a593Smuzhiyun #endif
1205*4882a593Smuzhiyun 
1206*4882a593Smuzhiyun 	err = -ENOENT;
1207*4882a593Smuzhiyun 	if (!ops) {
1208*4882a593Smuzhiyun 		NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
1209*4882a593Smuzhiyun 		goto err_out;
1210*4882a593Smuzhiyun 	}
1211*4882a593Smuzhiyun 
1212*4882a593Smuzhiyun 	sch = qdisc_alloc(dev_queue, ops, extack);
1213*4882a593Smuzhiyun 	if (IS_ERR(sch)) {
1214*4882a593Smuzhiyun 		err = PTR_ERR(sch);
1215*4882a593Smuzhiyun 		goto err_out2;
1216*4882a593Smuzhiyun 	}
1217*4882a593Smuzhiyun 
1218*4882a593Smuzhiyun 	sch->parent = parent;
1219*4882a593Smuzhiyun 
1220*4882a593Smuzhiyun 	if (handle == TC_H_INGRESS) {
1221*4882a593Smuzhiyun 		sch->flags |= TCQ_F_INGRESS;
1222*4882a593Smuzhiyun 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
1223*4882a593Smuzhiyun 	} else {
1224*4882a593Smuzhiyun 		if (handle == 0) {
1225*4882a593Smuzhiyun 			handle = qdisc_alloc_handle(dev);
1226*4882a593Smuzhiyun 			if (handle == 0) {
1227*4882a593Smuzhiyun 				NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1228*4882a593Smuzhiyun 				err = -ENOSPC;
1229*4882a593Smuzhiyun 				goto err_out3;
1230*4882a593Smuzhiyun 			}
1231*4882a593Smuzhiyun 		}
1232*4882a593Smuzhiyun 		if (!netif_is_multiqueue(dev))
1233*4882a593Smuzhiyun 			sch->flags |= TCQ_F_ONETXQUEUE;
1234*4882a593Smuzhiyun 	}
1235*4882a593Smuzhiyun 
1236*4882a593Smuzhiyun 	sch->handle = handle;
1237*4882a593Smuzhiyun 
1238*4882a593Smuzhiyun 	/* This exist to keep backward compatible with a userspace
1239*4882a593Smuzhiyun 	 * loophole, what allowed userspace to get IFF_NO_QUEUE
1240*4882a593Smuzhiyun 	 * facility on older kernels by setting tx_queue_len=0 (prior
1241*4882a593Smuzhiyun 	 * to qdisc init), and then forgot to reinit tx_queue_len
1242*4882a593Smuzhiyun 	 * before again attaching a qdisc.
1243*4882a593Smuzhiyun 	 */
1244*4882a593Smuzhiyun 	if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1245*4882a593Smuzhiyun 		dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1246*4882a593Smuzhiyun 		netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1247*4882a593Smuzhiyun 	}
1248*4882a593Smuzhiyun 
1249*4882a593Smuzhiyun 	err = qdisc_block_indexes_set(sch, tca, extack);
1250*4882a593Smuzhiyun 	if (err)
1251*4882a593Smuzhiyun 		goto err_out3;
1252*4882a593Smuzhiyun 
1253*4882a593Smuzhiyun 	if (ops->init) {
1254*4882a593Smuzhiyun 		err = ops->init(sch, tca[TCA_OPTIONS], extack);
1255*4882a593Smuzhiyun 		if (err != 0)
1256*4882a593Smuzhiyun 			goto err_out5;
1257*4882a593Smuzhiyun 	}
1258*4882a593Smuzhiyun 
1259*4882a593Smuzhiyun 	if (tca[TCA_STAB]) {
1260*4882a593Smuzhiyun 		stab = qdisc_get_stab(tca[TCA_STAB], extack);
1261*4882a593Smuzhiyun 		if (IS_ERR(stab)) {
1262*4882a593Smuzhiyun 			err = PTR_ERR(stab);
1263*4882a593Smuzhiyun 			goto err_out4;
1264*4882a593Smuzhiyun 		}
1265*4882a593Smuzhiyun 		rcu_assign_pointer(sch->stab, stab);
1266*4882a593Smuzhiyun 	}
1267*4882a593Smuzhiyun 	if (tca[TCA_RATE]) {
1268*4882a593Smuzhiyun 		seqcount_t *running;
1269*4882a593Smuzhiyun 
1270*4882a593Smuzhiyun 		err = -EOPNOTSUPP;
1271*4882a593Smuzhiyun 		if (sch->flags & TCQ_F_MQROOT) {
1272*4882a593Smuzhiyun 			NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1273*4882a593Smuzhiyun 			goto err_out4;
1274*4882a593Smuzhiyun 		}
1275*4882a593Smuzhiyun 
1276*4882a593Smuzhiyun 		if (sch->parent != TC_H_ROOT &&
1277*4882a593Smuzhiyun 		    !(sch->flags & TCQ_F_INGRESS) &&
1278*4882a593Smuzhiyun 		    (!p || !(p->flags & TCQ_F_MQROOT)))
1279*4882a593Smuzhiyun 			running = qdisc_root_sleeping_running(sch);
1280*4882a593Smuzhiyun 		else
1281*4882a593Smuzhiyun 			running = &sch->running;
1282*4882a593Smuzhiyun 
1283*4882a593Smuzhiyun 		err = gen_new_estimator(&sch->bstats,
1284*4882a593Smuzhiyun 					sch->cpu_bstats,
1285*4882a593Smuzhiyun 					&sch->rate_est,
1286*4882a593Smuzhiyun 					NULL,
1287*4882a593Smuzhiyun 					running,
1288*4882a593Smuzhiyun 					tca[TCA_RATE]);
1289*4882a593Smuzhiyun 		if (err) {
1290*4882a593Smuzhiyun 			NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1291*4882a593Smuzhiyun 			goto err_out4;
1292*4882a593Smuzhiyun 		}
1293*4882a593Smuzhiyun 	}
1294*4882a593Smuzhiyun 
1295*4882a593Smuzhiyun 	qdisc_hash_add(sch, false);
1296*4882a593Smuzhiyun 	trace_qdisc_create(ops, dev, parent);
1297*4882a593Smuzhiyun 
1298*4882a593Smuzhiyun 	return sch;
1299*4882a593Smuzhiyun 
1300*4882a593Smuzhiyun err_out5:
1301*4882a593Smuzhiyun 	/* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1302*4882a593Smuzhiyun 	if (ops->destroy)
1303*4882a593Smuzhiyun 		ops->destroy(sch);
1304*4882a593Smuzhiyun err_out3:
1305*4882a593Smuzhiyun 	dev_put(dev);
1306*4882a593Smuzhiyun 	qdisc_free(sch);
1307*4882a593Smuzhiyun err_out2:
1308*4882a593Smuzhiyun 	module_put(ops->owner);
1309*4882a593Smuzhiyun err_out:
1310*4882a593Smuzhiyun 	*errp = err;
1311*4882a593Smuzhiyun 	return NULL;
1312*4882a593Smuzhiyun 
1313*4882a593Smuzhiyun err_out4:
1314*4882a593Smuzhiyun 	/*
1315*4882a593Smuzhiyun 	 * Any broken qdiscs that would require a ops->reset() here?
1316*4882a593Smuzhiyun 	 * The qdisc was never in action so it shouldn't be necessary.
1317*4882a593Smuzhiyun 	 */
1318*4882a593Smuzhiyun 	qdisc_put_stab(rtnl_dereference(sch->stab));
1319*4882a593Smuzhiyun 	if (ops->destroy)
1320*4882a593Smuzhiyun 		ops->destroy(sch);
1321*4882a593Smuzhiyun 	goto err_out3;
1322*4882a593Smuzhiyun }
1323*4882a593Smuzhiyun 
qdisc_change(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)1324*4882a593Smuzhiyun static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1325*4882a593Smuzhiyun 			struct netlink_ext_ack *extack)
1326*4882a593Smuzhiyun {
1327*4882a593Smuzhiyun 	struct qdisc_size_table *ostab, *stab = NULL;
1328*4882a593Smuzhiyun 	int err = 0;
1329*4882a593Smuzhiyun 
1330*4882a593Smuzhiyun 	if (tca[TCA_OPTIONS]) {
1331*4882a593Smuzhiyun 		if (!sch->ops->change) {
1332*4882a593Smuzhiyun 			NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1333*4882a593Smuzhiyun 			return -EINVAL;
1334*4882a593Smuzhiyun 		}
1335*4882a593Smuzhiyun 		if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1336*4882a593Smuzhiyun 			NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1337*4882a593Smuzhiyun 			return -EOPNOTSUPP;
1338*4882a593Smuzhiyun 		}
1339*4882a593Smuzhiyun 		err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1340*4882a593Smuzhiyun 		if (err)
1341*4882a593Smuzhiyun 			return err;
1342*4882a593Smuzhiyun 	}
1343*4882a593Smuzhiyun 
1344*4882a593Smuzhiyun 	if (tca[TCA_STAB]) {
1345*4882a593Smuzhiyun 		stab = qdisc_get_stab(tca[TCA_STAB], extack);
1346*4882a593Smuzhiyun 		if (IS_ERR(stab))
1347*4882a593Smuzhiyun 			return PTR_ERR(stab);
1348*4882a593Smuzhiyun 	}
1349*4882a593Smuzhiyun 
1350*4882a593Smuzhiyun 	ostab = rtnl_dereference(sch->stab);
1351*4882a593Smuzhiyun 	rcu_assign_pointer(sch->stab, stab);
1352*4882a593Smuzhiyun 	qdisc_put_stab(ostab);
1353*4882a593Smuzhiyun 
1354*4882a593Smuzhiyun 	if (tca[TCA_RATE]) {
1355*4882a593Smuzhiyun 		/* NB: ignores errors from replace_estimator
1356*4882a593Smuzhiyun 		   because change can't be undone. */
1357*4882a593Smuzhiyun 		if (sch->flags & TCQ_F_MQROOT)
1358*4882a593Smuzhiyun 			goto out;
1359*4882a593Smuzhiyun 		gen_replace_estimator(&sch->bstats,
1360*4882a593Smuzhiyun 				      sch->cpu_bstats,
1361*4882a593Smuzhiyun 				      &sch->rate_est,
1362*4882a593Smuzhiyun 				      NULL,
1363*4882a593Smuzhiyun 				      qdisc_root_sleeping_running(sch),
1364*4882a593Smuzhiyun 				      tca[TCA_RATE]);
1365*4882a593Smuzhiyun 	}
1366*4882a593Smuzhiyun out:
1367*4882a593Smuzhiyun 	return 0;
1368*4882a593Smuzhiyun }
1369*4882a593Smuzhiyun 
1370*4882a593Smuzhiyun struct check_loop_arg {
1371*4882a593Smuzhiyun 	struct qdisc_walker	w;
1372*4882a593Smuzhiyun 	struct Qdisc		*p;
1373*4882a593Smuzhiyun 	int			depth;
1374*4882a593Smuzhiyun };
1375*4882a593Smuzhiyun 
1376*4882a593Smuzhiyun static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1377*4882a593Smuzhiyun 			 struct qdisc_walker *w);
1378*4882a593Smuzhiyun 
check_loop(struct Qdisc * q,struct Qdisc * p,int depth)1379*4882a593Smuzhiyun static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1380*4882a593Smuzhiyun {
1381*4882a593Smuzhiyun 	struct check_loop_arg	arg;
1382*4882a593Smuzhiyun 
1383*4882a593Smuzhiyun 	if (q->ops->cl_ops == NULL)
1384*4882a593Smuzhiyun 		return 0;
1385*4882a593Smuzhiyun 
1386*4882a593Smuzhiyun 	arg.w.stop = arg.w.skip = arg.w.count = 0;
1387*4882a593Smuzhiyun 	arg.w.fn = check_loop_fn;
1388*4882a593Smuzhiyun 	arg.depth = depth;
1389*4882a593Smuzhiyun 	arg.p = p;
1390*4882a593Smuzhiyun 	q->ops->cl_ops->walk(q, &arg.w);
1391*4882a593Smuzhiyun 	return arg.w.stop ? -ELOOP : 0;
1392*4882a593Smuzhiyun }
1393*4882a593Smuzhiyun 
1394*4882a593Smuzhiyun static int
check_loop_fn(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)1395*4882a593Smuzhiyun check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1396*4882a593Smuzhiyun {
1397*4882a593Smuzhiyun 	struct Qdisc *leaf;
1398*4882a593Smuzhiyun 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1399*4882a593Smuzhiyun 	struct check_loop_arg *arg = (struct check_loop_arg *)w;
1400*4882a593Smuzhiyun 
1401*4882a593Smuzhiyun 	leaf = cops->leaf(q, cl);
1402*4882a593Smuzhiyun 	if (leaf) {
1403*4882a593Smuzhiyun 		if (leaf == arg->p || arg->depth > 7)
1404*4882a593Smuzhiyun 			return -ELOOP;
1405*4882a593Smuzhiyun 		return check_loop(leaf, arg->p, arg->depth + 1);
1406*4882a593Smuzhiyun 	}
1407*4882a593Smuzhiyun 	return 0;
1408*4882a593Smuzhiyun }
1409*4882a593Smuzhiyun 
1410*4882a593Smuzhiyun const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1411*4882a593Smuzhiyun 	[TCA_KIND]		= { .type = NLA_STRING },
1412*4882a593Smuzhiyun 	[TCA_RATE]		= { .type = NLA_BINARY,
1413*4882a593Smuzhiyun 				    .len = sizeof(struct tc_estimator) },
1414*4882a593Smuzhiyun 	[TCA_STAB]		= { .type = NLA_NESTED },
1415*4882a593Smuzhiyun 	[TCA_DUMP_INVISIBLE]	= { .type = NLA_FLAG },
1416*4882a593Smuzhiyun 	[TCA_CHAIN]		= { .type = NLA_U32 },
1417*4882a593Smuzhiyun 	[TCA_INGRESS_BLOCK]	= { .type = NLA_U32 },
1418*4882a593Smuzhiyun 	[TCA_EGRESS_BLOCK]	= { .type = NLA_U32 },
1419*4882a593Smuzhiyun };
1420*4882a593Smuzhiyun 
1421*4882a593Smuzhiyun /*
1422*4882a593Smuzhiyun  * Delete/get qdisc.
1423*4882a593Smuzhiyun  */
1424*4882a593Smuzhiyun 
tc_get_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1425*4882a593Smuzhiyun static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1426*4882a593Smuzhiyun 			struct netlink_ext_ack *extack)
1427*4882a593Smuzhiyun {
1428*4882a593Smuzhiyun 	struct net *net = sock_net(skb->sk);
1429*4882a593Smuzhiyun 	struct tcmsg *tcm = nlmsg_data(n);
1430*4882a593Smuzhiyun 	struct nlattr *tca[TCA_MAX + 1];
1431*4882a593Smuzhiyun 	struct net_device *dev;
1432*4882a593Smuzhiyun 	u32 clid;
1433*4882a593Smuzhiyun 	struct Qdisc *q = NULL;
1434*4882a593Smuzhiyun 	struct Qdisc *p = NULL;
1435*4882a593Smuzhiyun 	int err;
1436*4882a593Smuzhiyun 
1437*4882a593Smuzhiyun 	if ((n->nlmsg_type != RTM_GETQDISC) &&
1438*4882a593Smuzhiyun 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1439*4882a593Smuzhiyun 		return -EPERM;
1440*4882a593Smuzhiyun 
1441*4882a593Smuzhiyun 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1442*4882a593Smuzhiyun 				     rtm_tca_policy, extack);
1443*4882a593Smuzhiyun 	if (err < 0)
1444*4882a593Smuzhiyun 		return err;
1445*4882a593Smuzhiyun 
1446*4882a593Smuzhiyun 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1447*4882a593Smuzhiyun 	if (!dev)
1448*4882a593Smuzhiyun 		return -ENODEV;
1449*4882a593Smuzhiyun 
1450*4882a593Smuzhiyun 	clid = tcm->tcm_parent;
1451*4882a593Smuzhiyun 	if (clid) {
1452*4882a593Smuzhiyun 		if (clid != TC_H_ROOT) {
1453*4882a593Smuzhiyun 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1454*4882a593Smuzhiyun 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1455*4882a593Smuzhiyun 				if (!p) {
1456*4882a593Smuzhiyun 					NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1457*4882a593Smuzhiyun 					return -ENOENT;
1458*4882a593Smuzhiyun 				}
1459*4882a593Smuzhiyun 				q = qdisc_leaf(p, clid);
1460*4882a593Smuzhiyun 			} else if (dev_ingress_queue(dev)) {
1461*4882a593Smuzhiyun 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1462*4882a593Smuzhiyun 			}
1463*4882a593Smuzhiyun 		} else {
1464*4882a593Smuzhiyun 			q = rtnl_dereference(dev->qdisc);
1465*4882a593Smuzhiyun 		}
1466*4882a593Smuzhiyun 		if (!q) {
1467*4882a593Smuzhiyun 			NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1468*4882a593Smuzhiyun 			return -ENOENT;
1469*4882a593Smuzhiyun 		}
1470*4882a593Smuzhiyun 
1471*4882a593Smuzhiyun 		if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1472*4882a593Smuzhiyun 			NL_SET_ERR_MSG(extack, "Invalid handle");
1473*4882a593Smuzhiyun 			return -EINVAL;
1474*4882a593Smuzhiyun 		}
1475*4882a593Smuzhiyun 	} else {
1476*4882a593Smuzhiyun 		q = qdisc_lookup(dev, tcm->tcm_handle);
1477*4882a593Smuzhiyun 		if (!q) {
1478*4882a593Smuzhiyun 			NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1479*4882a593Smuzhiyun 			return -ENOENT;
1480*4882a593Smuzhiyun 		}
1481*4882a593Smuzhiyun 	}
1482*4882a593Smuzhiyun 
1483*4882a593Smuzhiyun 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1484*4882a593Smuzhiyun 		NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1485*4882a593Smuzhiyun 		return -EINVAL;
1486*4882a593Smuzhiyun 	}
1487*4882a593Smuzhiyun 
1488*4882a593Smuzhiyun 	if (n->nlmsg_type == RTM_DELQDISC) {
1489*4882a593Smuzhiyun 		if (!clid) {
1490*4882a593Smuzhiyun 			NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1491*4882a593Smuzhiyun 			return -EINVAL;
1492*4882a593Smuzhiyun 		}
1493*4882a593Smuzhiyun 		if (q->handle == 0) {
1494*4882a593Smuzhiyun 			NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1495*4882a593Smuzhiyun 			return -ENOENT;
1496*4882a593Smuzhiyun 		}
1497*4882a593Smuzhiyun 		err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1498*4882a593Smuzhiyun 		if (err != 0)
1499*4882a593Smuzhiyun 			return err;
1500*4882a593Smuzhiyun 	} else {
1501*4882a593Smuzhiyun 		qdisc_notify(net, skb, n, clid, NULL, q);
1502*4882a593Smuzhiyun 	}
1503*4882a593Smuzhiyun 	return 0;
1504*4882a593Smuzhiyun }
1505*4882a593Smuzhiyun 
1506*4882a593Smuzhiyun /*
1507*4882a593Smuzhiyun  * Create/change qdisc.
1508*4882a593Smuzhiyun  */
1509*4882a593Smuzhiyun 
tc_modify_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1510*4882a593Smuzhiyun static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1511*4882a593Smuzhiyun 			   struct netlink_ext_ack *extack)
1512*4882a593Smuzhiyun {
1513*4882a593Smuzhiyun 	struct net *net = sock_net(skb->sk);
1514*4882a593Smuzhiyun 	struct tcmsg *tcm;
1515*4882a593Smuzhiyun 	struct nlattr *tca[TCA_MAX + 1];
1516*4882a593Smuzhiyun 	struct net_device *dev;
1517*4882a593Smuzhiyun 	u32 clid;
1518*4882a593Smuzhiyun 	struct Qdisc *q, *p;
1519*4882a593Smuzhiyun 	int err;
1520*4882a593Smuzhiyun 
1521*4882a593Smuzhiyun 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1522*4882a593Smuzhiyun 		return -EPERM;
1523*4882a593Smuzhiyun 
1524*4882a593Smuzhiyun replay:
1525*4882a593Smuzhiyun 	/* Reinit, just in case something touches this. */
1526*4882a593Smuzhiyun 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1527*4882a593Smuzhiyun 				     rtm_tca_policy, extack);
1528*4882a593Smuzhiyun 	if (err < 0)
1529*4882a593Smuzhiyun 		return err;
1530*4882a593Smuzhiyun 
1531*4882a593Smuzhiyun 	tcm = nlmsg_data(n);
1532*4882a593Smuzhiyun 	clid = tcm->tcm_parent;
1533*4882a593Smuzhiyun 	q = p = NULL;
1534*4882a593Smuzhiyun 
1535*4882a593Smuzhiyun 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1536*4882a593Smuzhiyun 	if (!dev)
1537*4882a593Smuzhiyun 		return -ENODEV;
1538*4882a593Smuzhiyun 
1539*4882a593Smuzhiyun 
1540*4882a593Smuzhiyun 	if (clid) {
1541*4882a593Smuzhiyun 		if (clid != TC_H_ROOT) {
1542*4882a593Smuzhiyun 			if (clid != TC_H_INGRESS) {
1543*4882a593Smuzhiyun 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1544*4882a593Smuzhiyun 				if (!p) {
1545*4882a593Smuzhiyun 					NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1546*4882a593Smuzhiyun 					return -ENOENT;
1547*4882a593Smuzhiyun 				}
1548*4882a593Smuzhiyun 				q = qdisc_leaf(p, clid);
1549*4882a593Smuzhiyun 			} else if (dev_ingress_queue_create(dev)) {
1550*4882a593Smuzhiyun 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1551*4882a593Smuzhiyun 			}
1552*4882a593Smuzhiyun 		} else {
1553*4882a593Smuzhiyun 			q = rtnl_dereference(dev->qdisc);
1554*4882a593Smuzhiyun 		}
1555*4882a593Smuzhiyun 
1556*4882a593Smuzhiyun 		/* It may be default qdisc, ignore it */
1557*4882a593Smuzhiyun 		if (q && q->handle == 0)
1558*4882a593Smuzhiyun 			q = NULL;
1559*4882a593Smuzhiyun 
1560*4882a593Smuzhiyun 		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1561*4882a593Smuzhiyun 			if (tcm->tcm_handle) {
1562*4882a593Smuzhiyun 				if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1563*4882a593Smuzhiyun 					NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1564*4882a593Smuzhiyun 					return -EEXIST;
1565*4882a593Smuzhiyun 				}
1566*4882a593Smuzhiyun 				if (TC_H_MIN(tcm->tcm_handle)) {
1567*4882a593Smuzhiyun 					NL_SET_ERR_MSG(extack, "Invalid minor handle");
1568*4882a593Smuzhiyun 					return -EINVAL;
1569*4882a593Smuzhiyun 				}
1570*4882a593Smuzhiyun 				q = qdisc_lookup(dev, tcm->tcm_handle);
1571*4882a593Smuzhiyun 				if (!q)
1572*4882a593Smuzhiyun 					goto create_n_graft;
1573*4882a593Smuzhiyun 				if (n->nlmsg_flags & NLM_F_EXCL) {
1574*4882a593Smuzhiyun 					NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1575*4882a593Smuzhiyun 					return -EEXIST;
1576*4882a593Smuzhiyun 				}
1577*4882a593Smuzhiyun 				if (tca[TCA_KIND] &&
1578*4882a593Smuzhiyun 				    nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1579*4882a593Smuzhiyun 					NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1580*4882a593Smuzhiyun 					return -EINVAL;
1581*4882a593Smuzhiyun 				}
1582*4882a593Smuzhiyun 				if (q == p ||
1583*4882a593Smuzhiyun 				    (p && check_loop(q, p, 0))) {
1584*4882a593Smuzhiyun 					NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1585*4882a593Smuzhiyun 					return -ELOOP;
1586*4882a593Smuzhiyun 				}
1587*4882a593Smuzhiyun 				qdisc_refcount_inc(q);
1588*4882a593Smuzhiyun 				goto graft;
1589*4882a593Smuzhiyun 			} else {
1590*4882a593Smuzhiyun 				if (!q)
1591*4882a593Smuzhiyun 					goto create_n_graft;
1592*4882a593Smuzhiyun 
1593*4882a593Smuzhiyun 				/* This magic test requires explanation.
1594*4882a593Smuzhiyun 				 *
1595*4882a593Smuzhiyun 				 *   We know, that some child q is already
1596*4882a593Smuzhiyun 				 *   attached to this parent and have choice:
1597*4882a593Smuzhiyun 				 *   either to change it or to create/graft new one.
1598*4882a593Smuzhiyun 				 *
1599*4882a593Smuzhiyun 				 *   1. We are allowed to create/graft only
1600*4882a593Smuzhiyun 				 *   if CREATE and REPLACE flags are set.
1601*4882a593Smuzhiyun 				 *
1602*4882a593Smuzhiyun 				 *   2. If EXCL is set, requestor wanted to say,
1603*4882a593Smuzhiyun 				 *   that qdisc tcm_handle is not expected
1604*4882a593Smuzhiyun 				 *   to exist, so that we choose create/graft too.
1605*4882a593Smuzhiyun 				 *
1606*4882a593Smuzhiyun 				 *   3. The last case is when no flags are set.
1607*4882a593Smuzhiyun 				 *   Alas, it is sort of hole in API, we
1608*4882a593Smuzhiyun 				 *   cannot decide what to do unambiguously.
1609*4882a593Smuzhiyun 				 *   For now we select create/graft, if
1610*4882a593Smuzhiyun 				 *   user gave KIND, which does not match existing.
1611*4882a593Smuzhiyun 				 */
1612*4882a593Smuzhiyun 				if ((n->nlmsg_flags & NLM_F_CREATE) &&
1613*4882a593Smuzhiyun 				    (n->nlmsg_flags & NLM_F_REPLACE) &&
1614*4882a593Smuzhiyun 				    ((n->nlmsg_flags & NLM_F_EXCL) ||
1615*4882a593Smuzhiyun 				     (tca[TCA_KIND] &&
1616*4882a593Smuzhiyun 				      nla_strcmp(tca[TCA_KIND], q->ops->id))))
1617*4882a593Smuzhiyun 					goto create_n_graft;
1618*4882a593Smuzhiyun 			}
1619*4882a593Smuzhiyun 		}
1620*4882a593Smuzhiyun 	} else {
1621*4882a593Smuzhiyun 		if (!tcm->tcm_handle) {
1622*4882a593Smuzhiyun 			NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1623*4882a593Smuzhiyun 			return -EINVAL;
1624*4882a593Smuzhiyun 		}
1625*4882a593Smuzhiyun 		q = qdisc_lookup(dev, tcm->tcm_handle);
1626*4882a593Smuzhiyun 	}
1627*4882a593Smuzhiyun 
1628*4882a593Smuzhiyun 	/* Change qdisc parameters */
1629*4882a593Smuzhiyun 	if (!q) {
1630*4882a593Smuzhiyun 		NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1631*4882a593Smuzhiyun 		return -ENOENT;
1632*4882a593Smuzhiyun 	}
1633*4882a593Smuzhiyun 	if (n->nlmsg_flags & NLM_F_EXCL) {
1634*4882a593Smuzhiyun 		NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1635*4882a593Smuzhiyun 		return -EEXIST;
1636*4882a593Smuzhiyun 	}
1637*4882a593Smuzhiyun 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1638*4882a593Smuzhiyun 		NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1639*4882a593Smuzhiyun 		return -EINVAL;
1640*4882a593Smuzhiyun 	}
1641*4882a593Smuzhiyun 	err = qdisc_change(q, tca, extack);
1642*4882a593Smuzhiyun 	if (err == 0)
1643*4882a593Smuzhiyun 		qdisc_notify(net, skb, n, clid, NULL, q);
1644*4882a593Smuzhiyun 	return err;
1645*4882a593Smuzhiyun 
1646*4882a593Smuzhiyun create_n_graft:
1647*4882a593Smuzhiyun 	if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1648*4882a593Smuzhiyun 		NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1649*4882a593Smuzhiyun 		return -ENOENT;
1650*4882a593Smuzhiyun 	}
1651*4882a593Smuzhiyun 	if (clid == TC_H_INGRESS) {
1652*4882a593Smuzhiyun 		if (dev_ingress_queue(dev)) {
1653*4882a593Smuzhiyun 			q = qdisc_create(dev, dev_ingress_queue(dev), p,
1654*4882a593Smuzhiyun 					 tcm->tcm_parent, tcm->tcm_parent,
1655*4882a593Smuzhiyun 					 tca, &err, extack);
1656*4882a593Smuzhiyun 		} else {
1657*4882a593Smuzhiyun 			NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1658*4882a593Smuzhiyun 			err = -ENOENT;
1659*4882a593Smuzhiyun 		}
1660*4882a593Smuzhiyun 	} else {
1661*4882a593Smuzhiyun 		struct netdev_queue *dev_queue;
1662*4882a593Smuzhiyun 
1663*4882a593Smuzhiyun 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1664*4882a593Smuzhiyun 			dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1665*4882a593Smuzhiyun 		else if (p)
1666*4882a593Smuzhiyun 			dev_queue = p->dev_queue;
1667*4882a593Smuzhiyun 		else
1668*4882a593Smuzhiyun 			dev_queue = netdev_get_tx_queue(dev, 0);
1669*4882a593Smuzhiyun 
1670*4882a593Smuzhiyun 		q = qdisc_create(dev, dev_queue, p,
1671*4882a593Smuzhiyun 				 tcm->tcm_parent, tcm->tcm_handle,
1672*4882a593Smuzhiyun 				 tca, &err, extack);
1673*4882a593Smuzhiyun 	}
1674*4882a593Smuzhiyun 	if (q == NULL) {
1675*4882a593Smuzhiyun 		if (err == -EAGAIN)
1676*4882a593Smuzhiyun 			goto replay;
1677*4882a593Smuzhiyun 		return err;
1678*4882a593Smuzhiyun 	}
1679*4882a593Smuzhiyun 
1680*4882a593Smuzhiyun graft:
1681*4882a593Smuzhiyun 	err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1682*4882a593Smuzhiyun 	if (err) {
1683*4882a593Smuzhiyun 		if (q)
1684*4882a593Smuzhiyun 			qdisc_put(q);
1685*4882a593Smuzhiyun 		return err;
1686*4882a593Smuzhiyun 	}
1687*4882a593Smuzhiyun 
1688*4882a593Smuzhiyun 	return 0;
1689*4882a593Smuzhiyun }
1690*4882a593Smuzhiyun 
tc_dump_qdisc_root(struct Qdisc * root,struct sk_buff * skb,struct netlink_callback * cb,int * q_idx_p,int s_q_idx,bool recur,bool dump_invisible)1691*4882a593Smuzhiyun static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1692*4882a593Smuzhiyun 			      struct netlink_callback *cb,
1693*4882a593Smuzhiyun 			      int *q_idx_p, int s_q_idx, bool recur,
1694*4882a593Smuzhiyun 			      bool dump_invisible)
1695*4882a593Smuzhiyun {
1696*4882a593Smuzhiyun 	int ret = 0, q_idx = *q_idx_p;
1697*4882a593Smuzhiyun 	struct Qdisc *q;
1698*4882a593Smuzhiyun 	int b;
1699*4882a593Smuzhiyun 
1700*4882a593Smuzhiyun 	if (!root)
1701*4882a593Smuzhiyun 		return 0;
1702*4882a593Smuzhiyun 
1703*4882a593Smuzhiyun 	q = root;
1704*4882a593Smuzhiyun 	if (q_idx < s_q_idx) {
1705*4882a593Smuzhiyun 		q_idx++;
1706*4882a593Smuzhiyun 	} else {
1707*4882a593Smuzhiyun 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1708*4882a593Smuzhiyun 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1709*4882a593Smuzhiyun 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1710*4882a593Smuzhiyun 				  RTM_NEWQDISC) <= 0)
1711*4882a593Smuzhiyun 			goto done;
1712*4882a593Smuzhiyun 		q_idx++;
1713*4882a593Smuzhiyun 	}
1714*4882a593Smuzhiyun 
1715*4882a593Smuzhiyun 	/* If dumping singletons, there is no qdisc_dev(root) and the singleton
1716*4882a593Smuzhiyun 	 * itself has already been dumped.
1717*4882a593Smuzhiyun 	 *
1718*4882a593Smuzhiyun 	 * If we've already dumped the top-level (ingress) qdisc above and the global
1719*4882a593Smuzhiyun 	 * qdisc hashtable, we don't want to hit it again
1720*4882a593Smuzhiyun 	 */
1721*4882a593Smuzhiyun 	if (!qdisc_dev(root) || !recur)
1722*4882a593Smuzhiyun 		goto out;
1723*4882a593Smuzhiyun 
1724*4882a593Smuzhiyun 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1725*4882a593Smuzhiyun 		if (q_idx < s_q_idx) {
1726*4882a593Smuzhiyun 			q_idx++;
1727*4882a593Smuzhiyun 			continue;
1728*4882a593Smuzhiyun 		}
1729*4882a593Smuzhiyun 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1730*4882a593Smuzhiyun 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1731*4882a593Smuzhiyun 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1732*4882a593Smuzhiyun 				  RTM_NEWQDISC) <= 0)
1733*4882a593Smuzhiyun 			goto done;
1734*4882a593Smuzhiyun 		q_idx++;
1735*4882a593Smuzhiyun 	}
1736*4882a593Smuzhiyun 
1737*4882a593Smuzhiyun out:
1738*4882a593Smuzhiyun 	*q_idx_p = q_idx;
1739*4882a593Smuzhiyun 	return ret;
1740*4882a593Smuzhiyun done:
1741*4882a593Smuzhiyun 	ret = -1;
1742*4882a593Smuzhiyun 	goto out;
1743*4882a593Smuzhiyun }
1744*4882a593Smuzhiyun 
tc_dump_qdisc(struct sk_buff * skb,struct netlink_callback * cb)1745*4882a593Smuzhiyun static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1746*4882a593Smuzhiyun {
1747*4882a593Smuzhiyun 	struct net *net = sock_net(skb->sk);
1748*4882a593Smuzhiyun 	int idx, q_idx;
1749*4882a593Smuzhiyun 	int s_idx, s_q_idx;
1750*4882a593Smuzhiyun 	struct net_device *dev;
1751*4882a593Smuzhiyun 	const struct nlmsghdr *nlh = cb->nlh;
1752*4882a593Smuzhiyun 	struct nlattr *tca[TCA_MAX + 1];
1753*4882a593Smuzhiyun 	int err;
1754*4882a593Smuzhiyun 
1755*4882a593Smuzhiyun 	s_idx = cb->args[0];
1756*4882a593Smuzhiyun 	s_q_idx = q_idx = cb->args[1];
1757*4882a593Smuzhiyun 
1758*4882a593Smuzhiyun 	idx = 0;
1759*4882a593Smuzhiyun 	ASSERT_RTNL();
1760*4882a593Smuzhiyun 
1761*4882a593Smuzhiyun 	err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1762*4882a593Smuzhiyun 				     rtm_tca_policy, cb->extack);
1763*4882a593Smuzhiyun 	if (err < 0)
1764*4882a593Smuzhiyun 		return err;
1765*4882a593Smuzhiyun 
1766*4882a593Smuzhiyun 	for_each_netdev(net, dev) {
1767*4882a593Smuzhiyun 		struct netdev_queue *dev_queue;
1768*4882a593Smuzhiyun 
1769*4882a593Smuzhiyun 		if (idx < s_idx)
1770*4882a593Smuzhiyun 			goto cont;
1771*4882a593Smuzhiyun 		if (idx > s_idx)
1772*4882a593Smuzhiyun 			s_q_idx = 0;
1773*4882a593Smuzhiyun 		q_idx = 0;
1774*4882a593Smuzhiyun 
1775*4882a593Smuzhiyun 		if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
1776*4882a593Smuzhiyun 				       skb, cb, &q_idx, s_q_idx,
1777*4882a593Smuzhiyun 				       true, tca[TCA_DUMP_INVISIBLE]) < 0)
1778*4882a593Smuzhiyun 			goto done;
1779*4882a593Smuzhiyun 
1780*4882a593Smuzhiyun 		dev_queue = dev_ingress_queue(dev);
1781*4882a593Smuzhiyun 		if (dev_queue &&
1782*4882a593Smuzhiyun 		    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1783*4882a593Smuzhiyun 				       &q_idx, s_q_idx, false,
1784*4882a593Smuzhiyun 				       tca[TCA_DUMP_INVISIBLE]) < 0)
1785*4882a593Smuzhiyun 			goto done;
1786*4882a593Smuzhiyun 
1787*4882a593Smuzhiyun cont:
1788*4882a593Smuzhiyun 		idx++;
1789*4882a593Smuzhiyun 	}
1790*4882a593Smuzhiyun 
1791*4882a593Smuzhiyun done:
1792*4882a593Smuzhiyun 	cb->args[0] = idx;
1793*4882a593Smuzhiyun 	cb->args[1] = q_idx;
1794*4882a593Smuzhiyun 
1795*4882a593Smuzhiyun 	return skb->len;
1796*4882a593Smuzhiyun }
1797*4882a593Smuzhiyun 
1798*4882a593Smuzhiyun 
1799*4882a593Smuzhiyun 
1800*4882a593Smuzhiyun /************************************************
1801*4882a593Smuzhiyun  *	Traffic classes manipulation.		*
1802*4882a593Smuzhiyun  ************************************************/
1803*4882a593Smuzhiyun 
tc_fill_tclass(struct sk_buff * skb,struct Qdisc * q,unsigned long cl,u32 portid,u32 seq,u16 flags,int event)1804*4882a593Smuzhiyun static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1805*4882a593Smuzhiyun 			  unsigned long cl,
1806*4882a593Smuzhiyun 			  u32 portid, u32 seq, u16 flags, int event)
1807*4882a593Smuzhiyun {
1808*4882a593Smuzhiyun 	struct tcmsg *tcm;
1809*4882a593Smuzhiyun 	struct nlmsghdr  *nlh;
1810*4882a593Smuzhiyun 	unsigned char *b = skb_tail_pointer(skb);
1811*4882a593Smuzhiyun 	struct gnet_dump d;
1812*4882a593Smuzhiyun 	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1813*4882a593Smuzhiyun 
1814*4882a593Smuzhiyun 	cond_resched();
1815*4882a593Smuzhiyun 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1816*4882a593Smuzhiyun 	if (!nlh)
1817*4882a593Smuzhiyun 		goto out_nlmsg_trim;
1818*4882a593Smuzhiyun 	tcm = nlmsg_data(nlh);
1819*4882a593Smuzhiyun 	tcm->tcm_family = AF_UNSPEC;
1820*4882a593Smuzhiyun 	tcm->tcm__pad1 = 0;
1821*4882a593Smuzhiyun 	tcm->tcm__pad2 = 0;
1822*4882a593Smuzhiyun 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1823*4882a593Smuzhiyun 	tcm->tcm_parent = q->handle;
1824*4882a593Smuzhiyun 	tcm->tcm_handle = q->handle;
1825*4882a593Smuzhiyun 	tcm->tcm_info = 0;
1826*4882a593Smuzhiyun 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1827*4882a593Smuzhiyun 		goto nla_put_failure;
1828*4882a593Smuzhiyun 	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1829*4882a593Smuzhiyun 		goto nla_put_failure;
1830*4882a593Smuzhiyun 
1831*4882a593Smuzhiyun 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1832*4882a593Smuzhiyun 					 NULL, &d, TCA_PAD) < 0)
1833*4882a593Smuzhiyun 		goto nla_put_failure;
1834*4882a593Smuzhiyun 
1835*4882a593Smuzhiyun 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1836*4882a593Smuzhiyun 		goto nla_put_failure;
1837*4882a593Smuzhiyun 
1838*4882a593Smuzhiyun 	if (gnet_stats_finish_copy(&d) < 0)
1839*4882a593Smuzhiyun 		goto nla_put_failure;
1840*4882a593Smuzhiyun 
1841*4882a593Smuzhiyun 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1842*4882a593Smuzhiyun 	return skb->len;
1843*4882a593Smuzhiyun 
1844*4882a593Smuzhiyun out_nlmsg_trim:
1845*4882a593Smuzhiyun nla_put_failure:
1846*4882a593Smuzhiyun 	nlmsg_trim(skb, b);
1847*4882a593Smuzhiyun 	return -1;
1848*4882a593Smuzhiyun }
1849*4882a593Smuzhiyun 
tclass_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,int event)1850*4882a593Smuzhiyun static int tclass_notify(struct net *net, struct sk_buff *oskb,
1851*4882a593Smuzhiyun 			 struct nlmsghdr *n, struct Qdisc *q,
1852*4882a593Smuzhiyun 			 unsigned long cl, int event)
1853*4882a593Smuzhiyun {
1854*4882a593Smuzhiyun 	struct sk_buff *skb;
1855*4882a593Smuzhiyun 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1856*4882a593Smuzhiyun 	int err = 0;
1857*4882a593Smuzhiyun 
1858*4882a593Smuzhiyun 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1859*4882a593Smuzhiyun 	if (!skb)
1860*4882a593Smuzhiyun 		return -ENOBUFS;
1861*4882a593Smuzhiyun 
1862*4882a593Smuzhiyun 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1863*4882a593Smuzhiyun 		kfree_skb(skb);
1864*4882a593Smuzhiyun 		return -EINVAL;
1865*4882a593Smuzhiyun 	}
1866*4882a593Smuzhiyun 
1867*4882a593Smuzhiyun 	err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1868*4882a593Smuzhiyun 			     n->nlmsg_flags & NLM_F_ECHO);
1869*4882a593Smuzhiyun 	if (err > 0)
1870*4882a593Smuzhiyun 		err = 0;
1871*4882a593Smuzhiyun 	return err;
1872*4882a593Smuzhiyun }
1873*4882a593Smuzhiyun 
tclass_del_notify(struct net * net,const struct Qdisc_class_ops * cops,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl)1874*4882a593Smuzhiyun static int tclass_del_notify(struct net *net,
1875*4882a593Smuzhiyun 			     const struct Qdisc_class_ops *cops,
1876*4882a593Smuzhiyun 			     struct sk_buff *oskb, struct nlmsghdr *n,
1877*4882a593Smuzhiyun 			     struct Qdisc *q, unsigned long cl)
1878*4882a593Smuzhiyun {
1879*4882a593Smuzhiyun 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1880*4882a593Smuzhiyun 	struct sk_buff *skb;
1881*4882a593Smuzhiyun 	int err = 0;
1882*4882a593Smuzhiyun 
1883*4882a593Smuzhiyun 	if (!cops->delete)
1884*4882a593Smuzhiyun 		return -EOPNOTSUPP;
1885*4882a593Smuzhiyun 
1886*4882a593Smuzhiyun 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1887*4882a593Smuzhiyun 	if (!skb)
1888*4882a593Smuzhiyun 		return -ENOBUFS;
1889*4882a593Smuzhiyun 
1890*4882a593Smuzhiyun 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1891*4882a593Smuzhiyun 			   RTM_DELTCLASS) < 0) {
1892*4882a593Smuzhiyun 		kfree_skb(skb);
1893*4882a593Smuzhiyun 		return -EINVAL;
1894*4882a593Smuzhiyun 	}
1895*4882a593Smuzhiyun 
1896*4882a593Smuzhiyun 	err = cops->delete(q, cl);
1897*4882a593Smuzhiyun 	if (err) {
1898*4882a593Smuzhiyun 		kfree_skb(skb);
1899*4882a593Smuzhiyun 		return err;
1900*4882a593Smuzhiyun 	}
1901*4882a593Smuzhiyun 
1902*4882a593Smuzhiyun 	err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1903*4882a593Smuzhiyun 			     n->nlmsg_flags & NLM_F_ECHO);
1904*4882a593Smuzhiyun 	if (err > 0)
1905*4882a593Smuzhiyun 		err = 0;
1906*4882a593Smuzhiyun 	return err;
1907*4882a593Smuzhiyun }
1908*4882a593Smuzhiyun 
1909*4882a593Smuzhiyun #ifdef CONFIG_NET_CLS
1910*4882a593Smuzhiyun 
1911*4882a593Smuzhiyun struct tcf_bind_args {
1912*4882a593Smuzhiyun 	struct tcf_walker w;
1913*4882a593Smuzhiyun 	unsigned long base;
1914*4882a593Smuzhiyun 	unsigned long cl;
1915*4882a593Smuzhiyun 	u32 classid;
1916*4882a593Smuzhiyun };
1917*4882a593Smuzhiyun 
tcf_node_bind(struct tcf_proto * tp,void * n,struct tcf_walker * arg)1918*4882a593Smuzhiyun static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1919*4882a593Smuzhiyun {
1920*4882a593Smuzhiyun 	struct tcf_bind_args *a = (void *)arg;
1921*4882a593Smuzhiyun 
1922*4882a593Smuzhiyun 	if (tp->ops->bind_class) {
1923*4882a593Smuzhiyun 		struct Qdisc *q = tcf_block_q(tp->chain->block);
1924*4882a593Smuzhiyun 
1925*4882a593Smuzhiyun 		sch_tree_lock(q);
1926*4882a593Smuzhiyun 		tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
1927*4882a593Smuzhiyun 		sch_tree_unlock(q);
1928*4882a593Smuzhiyun 	}
1929*4882a593Smuzhiyun 	return 0;
1930*4882a593Smuzhiyun }
1931*4882a593Smuzhiyun 
1932*4882a593Smuzhiyun struct tc_bind_class_args {
1933*4882a593Smuzhiyun 	struct qdisc_walker w;
1934*4882a593Smuzhiyun 	unsigned long new_cl;
1935*4882a593Smuzhiyun 	u32 portid;
1936*4882a593Smuzhiyun 	u32 clid;
1937*4882a593Smuzhiyun };
1938*4882a593Smuzhiyun 
tc_bind_class_walker(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)1939*4882a593Smuzhiyun static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
1940*4882a593Smuzhiyun 				struct qdisc_walker *w)
1941*4882a593Smuzhiyun {
1942*4882a593Smuzhiyun 	struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
1943*4882a593Smuzhiyun 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1944*4882a593Smuzhiyun 	struct tcf_block *block;
1945*4882a593Smuzhiyun 	struct tcf_chain *chain;
1946*4882a593Smuzhiyun 
1947*4882a593Smuzhiyun 	block = cops->tcf_block(q, cl, NULL);
1948*4882a593Smuzhiyun 	if (!block)
1949*4882a593Smuzhiyun 		return 0;
1950*4882a593Smuzhiyun 	for (chain = tcf_get_next_chain(block, NULL);
1951*4882a593Smuzhiyun 	     chain;
1952*4882a593Smuzhiyun 	     chain = tcf_get_next_chain(block, chain)) {
1953*4882a593Smuzhiyun 		struct tcf_proto *tp;
1954*4882a593Smuzhiyun 
1955*4882a593Smuzhiyun 		for (tp = tcf_get_next_proto(chain, NULL, true);
1956*4882a593Smuzhiyun 		     tp; tp = tcf_get_next_proto(chain, tp, true)) {
1957*4882a593Smuzhiyun 			struct tcf_bind_args arg = {};
1958*4882a593Smuzhiyun 
1959*4882a593Smuzhiyun 			arg.w.fn = tcf_node_bind;
1960*4882a593Smuzhiyun 			arg.classid = a->clid;
1961*4882a593Smuzhiyun 			arg.base = cl;
1962*4882a593Smuzhiyun 			arg.cl = a->new_cl;
1963*4882a593Smuzhiyun 			tp->ops->walk(tp, &arg.w, true);
1964*4882a593Smuzhiyun 		}
1965*4882a593Smuzhiyun 	}
1966*4882a593Smuzhiyun 
1967*4882a593Smuzhiyun 	return 0;
1968*4882a593Smuzhiyun }
1969*4882a593Smuzhiyun 
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)1970*4882a593Smuzhiyun static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1971*4882a593Smuzhiyun 			   unsigned long new_cl)
1972*4882a593Smuzhiyun {
1973*4882a593Smuzhiyun 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1974*4882a593Smuzhiyun 	struct tc_bind_class_args args = {};
1975*4882a593Smuzhiyun 
1976*4882a593Smuzhiyun 	if (!cops->tcf_block)
1977*4882a593Smuzhiyun 		return;
1978*4882a593Smuzhiyun 	args.portid = portid;
1979*4882a593Smuzhiyun 	args.clid = clid;
1980*4882a593Smuzhiyun 	args.new_cl = new_cl;
1981*4882a593Smuzhiyun 	args.w.fn = tc_bind_class_walker;
1982*4882a593Smuzhiyun 	q->ops->cl_ops->walk(q, &args.w);
1983*4882a593Smuzhiyun }
1984*4882a593Smuzhiyun 
1985*4882a593Smuzhiyun #else
1986*4882a593Smuzhiyun 
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)1987*4882a593Smuzhiyun static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1988*4882a593Smuzhiyun 			   unsigned long new_cl)
1989*4882a593Smuzhiyun {
1990*4882a593Smuzhiyun }
1991*4882a593Smuzhiyun 
1992*4882a593Smuzhiyun #endif
1993*4882a593Smuzhiyun 
tc_ctl_tclass(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1994*4882a593Smuzhiyun static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1995*4882a593Smuzhiyun 			 struct netlink_ext_ack *extack)
1996*4882a593Smuzhiyun {
1997*4882a593Smuzhiyun 	struct net *net = sock_net(skb->sk);
1998*4882a593Smuzhiyun 	struct tcmsg *tcm = nlmsg_data(n);
1999*4882a593Smuzhiyun 	struct nlattr *tca[TCA_MAX + 1];
2000*4882a593Smuzhiyun 	struct net_device *dev;
2001*4882a593Smuzhiyun 	struct Qdisc *q = NULL;
2002*4882a593Smuzhiyun 	const struct Qdisc_class_ops *cops;
2003*4882a593Smuzhiyun 	unsigned long cl = 0;
2004*4882a593Smuzhiyun 	unsigned long new_cl;
2005*4882a593Smuzhiyun 	u32 portid;
2006*4882a593Smuzhiyun 	u32 clid;
2007*4882a593Smuzhiyun 	u32 qid;
2008*4882a593Smuzhiyun 	int err;
2009*4882a593Smuzhiyun 
2010*4882a593Smuzhiyun 	if ((n->nlmsg_type != RTM_GETTCLASS) &&
2011*4882a593Smuzhiyun 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
2012*4882a593Smuzhiyun 		return -EPERM;
2013*4882a593Smuzhiyun 
2014*4882a593Smuzhiyun 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
2015*4882a593Smuzhiyun 				     rtm_tca_policy, extack);
2016*4882a593Smuzhiyun 	if (err < 0)
2017*4882a593Smuzhiyun 		return err;
2018*4882a593Smuzhiyun 
2019*4882a593Smuzhiyun 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2020*4882a593Smuzhiyun 	if (!dev)
2021*4882a593Smuzhiyun 		return -ENODEV;
2022*4882a593Smuzhiyun 
2023*4882a593Smuzhiyun 	/*
2024*4882a593Smuzhiyun 	   parent == TC_H_UNSPEC - unspecified parent.
2025*4882a593Smuzhiyun 	   parent == TC_H_ROOT   - class is root, which has no parent.
2026*4882a593Smuzhiyun 	   parent == X:0	 - parent is root class.
2027*4882a593Smuzhiyun 	   parent == X:Y	 - parent is a node in hierarchy.
2028*4882a593Smuzhiyun 	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
2029*4882a593Smuzhiyun 
2030*4882a593Smuzhiyun 	   handle == 0:0	 - generate handle from kernel pool.
2031*4882a593Smuzhiyun 	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
2032*4882a593Smuzhiyun 	   handle == X:Y	 - clear.
2033*4882a593Smuzhiyun 	   handle == X:0	 - root class.
2034*4882a593Smuzhiyun 	 */
2035*4882a593Smuzhiyun 
2036*4882a593Smuzhiyun 	/* Step 1. Determine qdisc handle X:0 */
2037*4882a593Smuzhiyun 
2038*4882a593Smuzhiyun 	portid = tcm->tcm_parent;
2039*4882a593Smuzhiyun 	clid = tcm->tcm_handle;
2040*4882a593Smuzhiyun 	qid = TC_H_MAJ(clid);
2041*4882a593Smuzhiyun 
2042*4882a593Smuzhiyun 	if (portid != TC_H_ROOT) {
2043*4882a593Smuzhiyun 		u32 qid1 = TC_H_MAJ(portid);
2044*4882a593Smuzhiyun 
2045*4882a593Smuzhiyun 		if (qid && qid1) {
2046*4882a593Smuzhiyun 			/* If both majors are known, they must be identical. */
2047*4882a593Smuzhiyun 			if (qid != qid1)
2048*4882a593Smuzhiyun 				return -EINVAL;
2049*4882a593Smuzhiyun 		} else if (qid1) {
2050*4882a593Smuzhiyun 			qid = qid1;
2051*4882a593Smuzhiyun 		} else if (qid == 0)
2052*4882a593Smuzhiyun 			qid = rtnl_dereference(dev->qdisc)->handle;
2053*4882a593Smuzhiyun 
2054*4882a593Smuzhiyun 		/* Now qid is genuine qdisc handle consistent
2055*4882a593Smuzhiyun 		 * both with parent and child.
2056*4882a593Smuzhiyun 		 *
2057*4882a593Smuzhiyun 		 * TC_H_MAJ(portid) still may be unspecified, complete it now.
2058*4882a593Smuzhiyun 		 */
2059*4882a593Smuzhiyun 		if (portid)
2060*4882a593Smuzhiyun 			portid = TC_H_MAKE(qid, portid);
2061*4882a593Smuzhiyun 	} else {
2062*4882a593Smuzhiyun 		if (qid == 0)
2063*4882a593Smuzhiyun 			qid = rtnl_dereference(dev->qdisc)->handle;
2064*4882a593Smuzhiyun 	}
2065*4882a593Smuzhiyun 
2066*4882a593Smuzhiyun 	/* OK. Locate qdisc */
2067*4882a593Smuzhiyun 	q = qdisc_lookup(dev, qid);
2068*4882a593Smuzhiyun 	if (!q)
2069*4882a593Smuzhiyun 		return -ENOENT;
2070*4882a593Smuzhiyun 
2071*4882a593Smuzhiyun 	/* An check that it supports classes */
2072*4882a593Smuzhiyun 	cops = q->ops->cl_ops;
2073*4882a593Smuzhiyun 	if (cops == NULL)
2074*4882a593Smuzhiyun 		return -EINVAL;
2075*4882a593Smuzhiyun 
2076*4882a593Smuzhiyun 	/* Now try to get class */
2077*4882a593Smuzhiyun 	if (clid == 0) {
2078*4882a593Smuzhiyun 		if (portid == TC_H_ROOT)
2079*4882a593Smuzhiyun 			clid = qid;
2080*4882a593Smuzhiyun 	} else
2081*4882a593Smuzhiyun 		clid = TC_H_MAKE(qid, clid);
2082*4882a593Smuzhiyun 
2083*4882a593Smuzhiyun 	if (clid)
2084*4882a593Smuzhiyun 		cl = cops->find(q, clid);
2085*4882a593Smuzhiyun 
2086*4882a593Smuzhiyun 	if (cl == 0) {
2087*4882a593Smuzhiyun 		err = -ENOENT;
2088*4882a593Smuzhiyun 		if (n->nlmsg_type != RTM_NEWTCLASS ||
2089*4882a593Smuzhiyun 		    !(n->nlmsg_flags & NLM_F_CREATE))
2090*4882a593Smuzhiyun 			goto out;
2091*4882a593Smuzhiyun 	} else {
2092*4882a593Smuzhiyun 		switch (n->nlmsg_type) {
2093*4882a593Smuzhiyun 		case RTM_NEWTCLASS:
2094*4882a593Smuzhiyun 			err = -EEXIST;
2095*4882a593Smuzhiyun 			if (n->nlmsg_flags & NLM_F_EXCL)
2096*4882a593Smuzhiyun 				goto out;
2097*4882a593Smuzhiyun 			break;
2098*4882a593Smuzhiyun 		case RTM_DELTCLASS:
2099*4882a593Smuzhiyun 			err = tclass_del_notify(net, cops, skb, n, q, cl);
2100*4882a593Smuzhiyun 			/* Unbind the class with flilters with 0 */
2101*4882a593Smuzhiyun 			tc_bind_tclass(q, portid, clid, 0);
2102*4882a593Smuzhiyun 			goto out;
2103*4882a593Smuzhiyun 		case RTM_GETTCLASS:
2104*4882a593Smuzhiyun 			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
2105*4882a593Smuzhiyun 			goto out;
2106*4882a593Smuzhiyun 		default:
2107*4882a593Smuzhiyun 			err = -EINVAL;
2108*4882a593Smuzhiyun 			goto out;
2109*4882a593Smuzhiyun 		}
2110*4882a593Smuzhiyun 	}
2111*4882a593Smuzhiyun 
2112*4882a593Smuzhiyun 	if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2113*4882a593Smuzhiyun 		NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2114*4882a593Smuzhiyun 		return -EOPNOTSUPP;
2115*4882a593Smuzhiyun 	}
2116*4882a593Smuzhiyun 
2117*4882a593Smuzhiyun 	new_cl = cl;
2118*4882a593Smuzhiyun 	err = -EOPNOTSUPP;
2119*4882a593Smuzhiyun 	if (cops->change)
2120*4882a593Smuzhiyun 		err = cops->change(q, clid, portid, tca, &new_cl, extack);
2121*4882a593Smuzhiyun 	if (err == 0) {
2122*4882a593Smuzhiyun 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
2123*4882a593Smuzhiyun 		/* We just create a new class, need to do reverse binding. */
2124*4882a593Smuzhiyun 		if (cl != new_cl)
2125*4882a593Smuzhiyun 			tc_bind_tclass(q, portid, clid, new_cl);
2126*4882a593Smuzhiyun 	}
2127*4882a593Smuzhiyun out:
2128*4882a593Smuzhiyun 	return err;
2129*4882a593Smuzhiyun }
2130*4882a593Smuzhiyun 
2131*4882a593Smuzhiyun struct qdisc_dump_args {
2132*4882a593Smuzhiyun 	struct qdisc_walker	w;
2133*4882a593Smuzhiyun 	struct sk_buff		*skb;
2134*4882a593Smuzhiyun 	struct netlink_callback	*cb;
2135*4882a593Smuzhiyun };
2136*4882a593Smuzhiyun 
qdisc_class_dump(struct Qdisc * q,unsigned long cl,struct qdisc_walker * arg)2137*4882a593Smuzhiyun static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2138*4882a593Smuzhiyun 			    struct qdisc_walker *arg)
2139*4882a593Smuzhiyun {
2140*4882a593Smuzhiyun 	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2141*4882a593Smuzhiyun 
2142*4882a593Smuzhiyun 	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2143*4882a593Smuzhiyun 			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2144*4882a593Smuzhiyun 			      RTM_NEWTCLASS);
2145*4882a593Smuzhiyun }
2146*4882a593Smuzhiyun 
tc_dump_tclass_qdisc(struct Qdisc * q,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t)2147*4882a593Smuzhiyun static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2148*4882a593Smuzhiyun 				struct tcmsg *tcm, struct netlink_callback *cb,
2149*4882a593Smuzhiyun 				int *t_p, int s_t)
2150*4882a593Smuzhiyun {
2151*4882a593Smuzhiyun 	struct qdisc_dump_args arg;
2152*4882a593Smuzhiyun 
2153*4882a593Smuzhiyun 	if (tc_qdisc_dump_ignore(q, false) ||
2154*4882a593Smuzhiyun 	    *t_p < s_t || !q->ops->cl_ops ||
2155*4882a593Smuzhiyun 	    (tcm->tcm_parent &&
2156*4882a593Smuzhiyun 	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2157*4882a593Smuzhiyun 		(*t_p)++;
2158*4882a593Smuzhiyun 		return 0;
2159*4882a593Smuzhiyun 	}
2160*4882a593Smuzhiyun 	if (*t_p > s_t)
2161*4882a593Smuzhiyun 		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2162*4882a593Smuzhiyun 	arg.w.fn = qdisc_class_dump;
2163*4882a593Smuzhiyun 	arg.skb = skb;
2164*4882a593Smuzhiyun 	arg.cb = cb;
2165*4882a593Smuzhiyun 	arg.w.stop  = 0;
2166*4882a593Smuzhiyun 	arg.w.skip = cb->args[1];
2167*4882a593Smuzhiyun 	arg.w.count = 0;
2168*4882a593Smuzhiyun 	q->ops->cl_ops->walk(q, &arg.w);
2169*4882a593Smuzhiyun 	cb->args[1] = arg.w.count;
2170*4882a593Smuzhiyun 	if (arg.w.stop)
2171*4882a593Smuzhiyun 		return -1;
2172*4882a593Smuzhiyun 	(*t_p)++;
2173*4882a593Smuzhiyun 	return 0;
2174*4882a593Smuzhiyun }
2175*4882a593Smuzhiyun 
tc_dump_tclass_root(struct Qdisc * root,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t,bool recur)2176*4882a593Smuzhiyun static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2177*4882a593Smuzhiyun 			       struct tcmsg *tcm, struct netlink_callback *cb,
2178*4882a593Smuzhiyun 			       int *t_p, int s_t, bool recur)
2179*4882a593Smuzhiyun {
2180*4882a593Smuzhiyun 	struct Qdisc *q;
2181*4882a593Smuzhiyun 	int b;
2182*4882a593Smuzhiyun 
2183*4882a593Smuzhiyun 	if (!root)
2184*4882a593Smuzhiyun 		return 0;
2185*4882a593Smuzhiyun 
2186*4882a593Smuzhiyun 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2187*4882a593Smuzhiyun 		return -1;
2188*4882a593Smuzhiyun 
2189*4882a593Smuzhiyun 	if (!qdisc_dev(root) || !recur)
2190*4882a593Smuzhiyun 		return 0;
2191*4882a593Smuzhiyun 
2192*4882a593Smuzhiyun 	if (tcm->tcm_parent) {
2193*4882a593Smuzhiyun 		q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2194*4882a593Smuzhiyun 		if (q && q != root &&
2195*4882a593Smuzhiyun 		    tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2196*4882a593Smuzhiyun 			return -1;
2197*4882a593Smuzhiyun 		return 0;
2198*4882a593Smuzhiyun 	}
2199*4882a593Smuzhiyun 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2200*4882a593Smuzhiyun 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2201*4882a593Smuzhiyun 			return -1;
2202*4882a593Smuzhiyun 	}
2203*4882a593Smuzhiyun 
2204*4882a593Smuzhiyun 	return 0;
2205*4882a593Smuzhiyun }
2206*4882a593Smuzhiyun 
tc_dump_tclass(struct sk_buff * skb,struct netlink_callback * cb)2207*4882a593Smuzhiyun static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2208*4882a593Smuzhiyun {
2209*4882a593Smuzhiyun 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2210*4882a593Smuzhiyun 	struct net *net = sock_net(skb->sk);
2211*4882a593Smuzhiyun 	struct netdev_queue *dev_queue;
2212*4882a593Smuzhiyun 	struct net_device *dev;
2213*4882a593Smuzhiyun 	int t, s_t;
2214*4882a593Smuzhiyun 
2215*4882a593Smuzhiyun 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2216*4882a593Smuzhiyun 		return 0;
2217*4882a593Smuzhiyun 	dev = dev_get_by_index(net, tcm->tcm_ifindex);
2218*4882a593Smuzhiyun 	if (!dev)
2219*4882a593Smuzhiyun 		return 0;
2220*4882a593Smuzhiyun 
2221*4882a593Smuzhiyun 	s_t = cb->args[0];
2222*4882a593Smuzhiyun 	t = 0;
2223*4882a593Smuzhiyun 
2224*4882a593Smuzhiyun 	if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
2225*4882a593Smuzhiyun 				skb, tcm, cb, &t, s_t, true) < 0)
2226*4882a593Smuzhiyun 		goto done;
2227*4882a593Smuzhiyun 
2228*4882a593Smuzhiyun 	dev_queue = dev_ingress_queue(dev);
2229*4882a593Smuzhiyun 	if (dev_queue &&
2230*4882a593Smuzhiyun 	    tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
2231*4882a593Smuzhiyun 				&t, s_t, false) < 0)
2232*4882a593Smuzhiyun 		goto done;
2233*4882a593Smuzhiyun 
2234*4882a593Smuzhiyun done:
2235*4882a593Smuzhiyun 	cb->args[0] = t;
2236*4882a593Smuzhiyun 
2237*4882a593Smuzhiyun 	dev_put(dev);
2238*4882a593Smuzhiyun 	return skb->len;
2239*4882a593Smuzhiyun }
2240*4882a593Smuzhiyun 
2241*4882a593Smuzhiyun #ifdef CONFIG_PROC_FS
psched_show(struct seq_file * seq,void * v)2242*4882a593Smuzhiyun static int psched_show(struct seq_file *seq, void *v)
2243*4882a593Smuzhiyun {
2244*4882a593Smuzhiyun 	seq_printf(seq, "%08x %08x %08x %08x\n",
2245*4882a593Smuzhiyun 		   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2246*4882a593Smuzhiyun 		   1000000,
2247*4882a593Smuzhiyun 		   (u32)NSEC_PER_SEC / hrtimer_resolution);
2248*4882a593Smuzhiyun 
2249*4882a593Smuzhiyun 	return 0;
2250*4882a593Smuzhiyun }
2251*4882a593Smuzhiyun 
psched_net_init(struct net * net)2252*4882a593Smuzhiyun static int __net_init psched_net_init(struct net *net)
2253*4882a593Smuzhiyun {
2254*4882a593Smuzhiyun 	struct proc_dir_entry *e;
2255*4882a593Smuzhiyun 
2256*4882a593Smuzhiyun 	e = proc_create_single("psched", 0, net->proc_net, psched_show);
2257*4882a593Smuzhiyun 	if (e == NULL)
2258*4882a593Smuzhiyun 		return -ENOMEM;
2259*4882a593Smuzhiyun 
2260*4882a593Smuzhiyun 	return 0;
2261*4882a593Smuzhiyun }
2262*4882a593Smuzhiyun 
psched_net_exit(struct net * net)2263*4882a593Smuzhiyun static void __net_exit psched_net_exit(struct net *net)
2264*4882a593Smuzhiyun {
2265*4882a593Smuzhiyun 	remove_proc_entry("psched", net->proc_net);
2266*4882a593Smuzhiyun }
2267*4882a593Smuzhiyun #else
psched_net_init(struct net * net)2268*4882a593Smuzhiyun static int __net_init psched_net_init(struct net *net)
2269*4882a593Smuzhiyun {
2270*4882a593Smuzhiyun 	return 0;
2271*4882a593Smuzhiyun }
2272*4882a593Smuzhiyun 
psched_net_exit(struct net * net)2273*4882a593Smuzhiyun static void __net_exit psched_net_exit(struct net *net)
2274*4882a593Smuzhiyun {
2275*4882a593Smuzhiyun }
2276*4882a593Smuzhiyun #endif
2277*4882a593Smuzhiyun 
2278*4882a593Smuzhiyun static struct pernet_operations psched_net_ops = {
2279*4882a593Smuzhiyun 	.init = psched_net_init,
2280*4882a593Smuzhiyun 	.exit = psched_net_exit,
2281*4882a593Smuzhiyun };
2282*4882a593Smuzhiyun 
pktsched_init(void)2283*4882a593Smuzhiyun static int __init pktsched_init(void)
2284*4882a593Smuzhiyun {
2285*4882a593Smuzhiyun 	int err;
2286*4882a593Smuzhiyun 
2287*4882a593Smuzhiyun 	err = register_pernet_subsys(&psched_net_ops);
2288*4882a593Smuzhiyun 	if (err) {
2289*4882a593Smuzhiyun 		pr_err("pktsched_init: "
2290*4882a593Smuzhiyun 		       "cannot initialize per netns operations\n");
2291*4882a593Smuzhiyun 		return err;
2292*4882a593Smuzhiyun 	}
2293*4882a593Smuzhiyun 
2294*4882a593Smuzhiyun 	register_qdisc(&pfifo_fast_ops);
2295*4882a593Smuzhiyun 	register_qdisc(&pfifo_qdisc_ops);
2296*4882a593Smuzhiyun 	register_qdisc(&bfifo_qdisc_ops);
2297*4882a593Smuzhiyun 	register_qdisc(&pfifo_head_drop_qdisc_ops);
2298*4882a593Smuzhiyun 	register_qdisc(&mq_qdisc_ops);
2299*4882a593Smuzhiyun 	register_qdisc(&noqueue_qdisc_ops);
2300*4882a593Smuzhiyun 
2301*4882a593Smuzhiyun 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2302*4882a593Smuzhiyun 	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2303*4882a593Smuzhiyun 	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2304*4882a593Smuzhiyun 		      0);
2305*4882a593Smuzhiyun 	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2306*4882a593Smuzhiyun 	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2307*4882a593Smuzhiyun 	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2308*4882a593Smuzhiyun 		      0);
2309*4882a593Smuzhiyun 
2310*4882a593Smuzhiyun 	return 0;
2311*4882a593Smuzhiyun }
2312*4882a593Smuzhiyun 
2313*4882a593Smuzhiyun subsys_initcall(pktsched_init);
2314