xref: /OK3568_Linux_fs/kernel/net/sched/sch_generic.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * net/sched/sch_generic.c	Generic packet scheduler routines.
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6*4882a593Smuzhiyun  *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
7*4882a593Smuzhiyun  *              - Ingress support
8*4882a593Smuzhiyun  */
9*4882a593Smuzhiyun 
10*4882a593Smuzhiyun #include <linux/bitops.h>
11*4882a593Smuzhiyun #include <linux/module.h>
12*4882a593Smuzhiyun #include <linux/types.h>
13*4882a593Smuzhiyun #include <linux/kernel.h>
14*4882a593Smuzhiyun #include <linux/sched.h>
15*4882a593Smuzhiyun #include <linux/string.h>
16*4882a593Smuzhiyun #include <linux/errno.h>
17*4882a593Smuzhiyun #include <linux/netdevice.h>
18*4882a593Smuzhiyun #include <linux/skbuff.h>
19*4882a593Smuzhiyun #include <linux/rtnetlink.h>
20*4882a593Smuzhiyun #include <linux/init.h>
21*4882a593Smuzhiyun #include <linux/rcupdate.h>
22*4882a593Smuzhiyun #include <linux/list.h>
23*4882a593Smuzhiyun #include <linux/slab.h>
24*4882a593Smuzhiyun #include <linux/if_vlan.h>
25*4882a593Smuzhiyun #include <linux/skb_array.h>
26*4882a593Smuzhiyun #include <linux/if_macvlan.h>
27*4882a593Smuzhiyun #include <net/sch_generic.h>
28*4882a593Smuzhiyun #include <net/pkt_sched.h>
29*4882a593Smuzhiyun #include <net/dst.h>
30*4882a593Smuzhiyun #include <trace/events/qdisc.h>
31*4882a593Smuzhiyun #include <trace/events/net.h>
32*4882a593Smuzhiyun #include <net/xfrm.h>
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun /* Qdisc to use by default */
35*4882a593Smuzhiyun const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
36*4882a593Smuzhiyun EXPORT_SYMBOL(default_qdisc_ops);
37*4882a593Smuzhiyun 
qdisc_maybe_clear_missed(struct Qdisc * q,const struct netdev_queue * txq)38*4882a593Smuzhiyun static void qdisc_maybe_clear_missed(struct Qdisc *q,
39*4882a593Smuzhiyun 				     const struct netdev_queue *txq)
40*4882a593Smuzhiyun {
41*4882a593Smuzhiyun 	clear_bit(__QDISC_STATE_MISSED, &q->state);
42*4882a593Smuzhiyun 
43*4882a593Smuzhiyun 	/* Make sure the below netif_xmit_frozen_or_stopped()
44*4882a593Smuzhiyun 	 * checking happens after clearing STATE_MISSED.
45*4882a593Smuzhiyun 	 */
46*4882a593Smuzhiyun 	smp_mb__after_atomic();
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun 	/* Checking netif_xmit_frozen_or_stopped() again to
49*4882a593Smuzhiyun 	 * make sure STATE_MISSED is set if the STATE_MISSED
50*4882a593Smuzhiyun 	 * set by netif_tx_wake_queue()'s rescheduling of
51*4882a593Smuzhiyun 	 * net_tx_action() is cleared by the above clear_bit().
52*4882a593Smuzhiyun 	 */
53*4882a593Smuzhiyun 	if (!netif_xmit_frozen_or_stopped(txq))
54*4882a593Smuzhiyun 		set_bit(__QDISC_STATE_MISSED, &q->state);
55*4882a593Smuzhiyun }
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun /* Main transmission queue. */
58*4882a593Smuzhiyun 
59*4882a593Smuzhiyun /* Modifications to data participating in scheduling must be protected with
60*4882a593Smuzhiyun  * qdisc_lock(qdisc) spinlock.
61*4882a593Smuzhiyun  *
62*4882a593Smuzhiyun  * The idea is the following:
63*4882a593Smuzhiyun  * - enqueue, dequeue are serialized via qdisc root lock
64*4882a593Smuzhiyun  * - ingress filtering is also serialized via qdisc root lock
65*4882a593Smuzhiyun  * - updates to tree and tree walking are only done under the rtnl mutex.
66*4882a593Smuzhiyun  */
67*4882a593Smuzhiyun 
68*4882a593Smuzhiyun #define SKB_XOFF_MAGIC ((struct sk_buff *)1UL)
69*4882a593Smuzhiyun 
__skb_dequeue_bad_txq(struct Qdisc * q)70*4882a593Smuzhiyun static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
71*4882a593Smuzhiyun {
72*4882a593Smuzhiyun 	const struct netdev_queue *txq = q->dev_queue;
73*4882a593Smuzhiyun 	spinlock_t *lock = NULL;
74*4882a593Smuzhiyun 	struct sk_buff *skb;
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun 	if (q->flags & TCQ_F_NOLOCK) {
77*4882a593Smuzhiyun 		lock = qdisc_lock(q);
78*4882a593Smuzhiyun 		spin_lock(lock);
79*4882a593Smuzhiyun 	}
80*4882a593Smuzhiyun 
81*4882a593Smuzhiyun 	skb = skb_peek(&q->skb_bad_txq);
82*4882a593Smuzhiyun 	if (skb) {
83*4882a593Smuzhiyun 		/* check the reason of requeuing without tx lock first */
84*4882a593Smuzhiyun 		txq = skb_get_tx_queue(txq->dev, skb);
85*4882a593Smuzhiyun 		if (!netif_xmit_frozen_or_stopped(txq)) {
86*4882a593Smuzhiyun 			skb = __skb_dequeue(&q->skb_bad_txq);
87*4882a593Smuzhiyun 			if (qdisc_is_percpu_stats(q)) {
88*4882a593Smuzhiyun 				qdisc_qstats_cpu_backlog_dec(q, skb);
89*4882a593Smuzhiyun 				qdisc_qstats_cpu_qlen_dec(q);
90*4882a593Smuzhiyun 			} else {
91*4882a593Smuzhiyun 				qdisc_qstats_backlog_dec(q, skb);
92*4882a593Smuzhiyun 				q->q.qlen--;
93*4882a593Smuzhiyun 			}
94*4882a593Smuzhiyun 		} else {
95*4882a593Smuzhiyun 			skb = SKB_XOFF_MAGIC;
96*4882a593Smuzhiyun 			qdisc_maybe_clear_missed(q, txq);
97*4882a593Smuzhiyun 		}
98*4882a593Smuzhiyun 	}
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun 	if (lock)
101*4882a593Smuzhiyun 		spin_unlock(lock);
102*4882a593Smuzhiyun 
103*4882a593Smuzhiyun 	return skb;
104*4882a593Smuzhiyun }
105*4882a593Smuzhiyun 
qdisc_dequeue_skb_bad_txq(struct Qdisc * q)106*4882a593Smuzhiyun static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *q)
107*4882a593Smuzhiyun {
108*4882a593Smuzhiyun 	struct sk_buff *skb = skb_peek(&q->skb_bad_txq);
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun 	if (unlikely(skb))
111*4882a593Smuzhiyun 		skb = __skb_dequeue_bad_txq(q);
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun 	return skb;
114*4882a593Smuzhiyun }
115*4882a593Smuzhiyun 
qdisc_enqueue_skb_bad_txq(struct Qdisc * q,struct sk_buff * skb)116*4882a593Smuzhiyun static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
117*4882a593Smuzhiyun 					     struct sk_buff *skb)
118*4882a593Smuzhiyun {
119*4882a593Smuzhiyun 	spinlock_t *lock = NULL;
120*4882a593Smuzhiyun 
121*4882a593Smuzhiyun 	if (q->flags & TCQ_F_NOLOCK) {
122*4882a593Smuzhiyun 		lock = qdisc_lock(q);
123*4882a593Smuzhiyun 		spin_lock(lock);
124*4882a593Smuzhiyun 	}
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun 	__skb_queue_tail(&q->skb_bad_txq, skb);
127*4882a593Smuzhiyun 
128*4882a593Smuzhiyun 	if (qdisc_is_percpu_stats(q)) {
129*4882a593Smuzhiyun 		qdisc_qstats_cpu_backlog_inc(q, skb);
130*4882a593Smuzhiyun 		qdisc_qstats_cpu_qlen_inc(q);
131*4882a593Smuzhiyun 	} else {
132*4882a593Smuzhiyun 		qdisc_qstats_backlog_inc(q, skb);
133*4882a593Smuzhiyun 		q->q.qlen++;
134*4882a593Smuzhiyun 	}
135*4882a593Smuzhiyun 
136*4882a593Smuzhiyun 	if (lock)
137*4882a593Smuzhiyun 		spin_unlock(lock);
138*4882a593Smuzhiyun }
139*4882a593Smuzhiyun 
dev_requeue_skb(struct sk_buff * skb,struct Qdisc * q)140*4882a593Smuzhiyun static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
141*4882a593Smuzhiyun {
142*4882a593Smuzhiyun 	spinlock_t *lock = NULL;
143*4882a593Smuzhiyun 
144*4882a593Smuzhiyun 	if (q->flags & TCQ_F_NOLOCK) {
145*4882a593Smuzhiyun 		lock = qdisc_lock(q);
146*4882a593Smuzhiyun 		spin_lock(lock);
147*4882a593Smuzhiyun 	}
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun 	while (skb) {
150*4882a593Smuzhiyun 		struct sk_buff *next = skb->next;
151*4882a593Smuzhiyun 
152*4882a593Smuzhiyun 		__skb_queue_tail(&q->gso_skb, skb);
153*4882a593Smuzhiyun 
154*4882a593Smuzhiyun 		/* it's still part of the queue */
155*4882a593Smuzhiyun 		if (qdisc_is_percpu_stats(q)) {
156*4882a593Smuzhiyun 			qdisc_qstats_cpu_requeues_inc(q);
157*4882a593Smuzhiyun 			qdisc_qstats_cpu_backlog_inc(q, skb);
158*4882a593Smuzhiyun 			qdisc_qstats_cpu_qlen_inc(q);
159*4882a593Smuzhiyun 		} else {
160*4882a593Smuzhiyun 			q->qstats.requeues++;
161*4882a593Smuzhiyun 			qdisc_qstats_backlog_inc(q, skb);
162*4882a593Smuzhiyun 			q->q.qlen++;
163*4882a593Smuzhiyun 		}
164*4882a593Smuzhiyun 
165*4882a593Smuzhiyun 		skb = next;
166*4882a593Smuzhiyun 	}
167*4882a593Smuzhiyun 	if (lock)
168*4882a593Smuzhiyun 		spin_unlock(lock);
169*4882a593Smuzhiyun 	__netif_schedule(q);
170*4882a593Smuzhiyun }
171*4882a593Smuzhiyun 
try_bulk_dequeue_skb(struct Qdisc * q,struct sk_buff * skb,const struct netdev_queue * txq,int * packets)172*4882a593Smuzhiyun static void try_bulk_dequeue_skb(struct Qdisc *q,
173*4882a593Smuzhiyun 				 struct sk_buff *skb,
174*4882a593Smuzhiyun 				 const struct netdev_queue *txq,
175*4882a593Smuzhiyun 				 int *packets)
176*4882a593Smuzhiyun {
177*4882a593Smuzhiyun 	int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
178*4882a593Smuzhiyun 
179*4882a593Smuzhiyun 	while (bytelimit > 0) {
180*4882a593Smuzhiyun 		struct sk_buff *nskb = q->dequeue(q);
181*4882a593Smuzhiyun 
182*4882a593Smuzhiyun 		if (!nskb)
183*4882a593Smuzhiyun 			break;
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 		bytelimit -= nskb->len; /* covers GSO len */
186*4882a593Smuzhiyun 		skb->next = nskb;
187*4882a593Smuzhiyun 		skb = nskb;
188*4882a593Smuzhiyun 		(*packets)++; /* GSO counts as one pkt */
189*4882a593Smuzhiyun 	}
190*4882a593Smuzhiyun 	skb_mark_not_on_list(skb);
191*4882a593Smuzhiyun }
192*4882a593Smuzhiyun 
193*4882a593Smuzhiyun /* This variant of try_bulk_dequeue_skb() makes sure
194*4882a593Smuzhiyun  * all skbs in the chain are for the same txq
195*4882a593Smuzhiyun  */
try_bulk_dequeue_skb_slow(struct Qdisc * q,struct sk_buff * skb,int * packets)196*4882a593Smuzhiyun static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
197*4882a593Smuzhiyun 				      struct sk_buff *skb,
198*4882a593Smuzhiyun 				      int *packets)
199*4882a593Smuzhiyun {
200*4882a593Smuzhiyun 	int mapping = skb_get_queue_mapping(skb);
201*4882a593Smuzhiyun 	struct sk_buff *nskb;
202*4882a593Smuzhiyun 	int cnt = 0;
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun 	do {
205*4882a593Smuzhiyun 		nskb = q->dequeue(q);
206*4882a593Smuzhiyun 		if (!nskb)
207*4882a593Smuzhiyun 			break;
208*4882a593Smuzhiyun 		if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
209*4882a593Smuzhiyun 			qdisc_enqueue_skb_bad_txq(q, nskb);
210*4882a593Smuzhiyun 			break;
211*4882a593Smuzhiyun 		}
212*4882a593Smuzhiyun 		skb->next = nskb;
213*4882a593Smuzhiyun 		skb = nskb;
214*4882a593Smuzhiyun 	} while (++cnt < 8);
215*4882a593Smuzhiyun 	(*packets) += cnt;
216*4882a593Smuzhiyun 	skb_mark_not_on_list(skb);
217*4882a593Smuzhiyun }
218*4882a593Smuzhiyun 
219*4882a593Smuzhiyun /* Note that dequeue_skb can possibly return a SKB list (via skb->next).
220*4882a593Smuzhiyun  * A requeued skb (via q->gso_skb) can also be a SKB list.
221*4882a593Smuzhiyun  */
dequeue_skb(struct Qdisc * q,bool * validate,int * packets)222*4882a593Smuzhiyun static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
223*4882a593Smuzhiyun 				   int *packets)
224*4882a593Smuzhiyun {
225*4882a593Smuzhiyun 	const struct netdev_queue *txq = q->dev_queue;
226*4882a593Smuzhiyun 	struct sk_buff *skb = NULL;
227*4882a593Smuzhiyun 
228*4882a593Smuzhiyun 	*packets = 1;
229*4882a593Smuzhiyun 	if (unlikely(!skb_queue_empty(&q->gso_skb))) {
230*4882a593Smuzhiyun 		spinlock_t *lock = NULL;
231*4882a593Smuzhiyun 
232*4882a593Smuzhiyun 		if (q->flags & TCQ_F_NOLOCK) {
233*4882a593Smuzhiyun 			lock = qdisc_lock(q);
234*4882a593Smuzhiyun 			spin_lock(lock);
235*4882a593Smuzhiyun 		}
236*4882a593Smuzhiyun 
237*4882a593Smuzhiyun 		skb = skb_peek(&q->gso_skb);
238*4882a593Smuzhiyun 
239*4882a593Smuzhiyun 		/* skb may be null if another cpu pulls gso_skb off in between
240*4882a593Smuzhiyun 		 * empty check and lock.
241*4882a593Smuzhiyun 		 */
242*4882a593Smuzhiyun 		if (!skb) {
243*4882a593Smuzhiyun 			if (lock)
244*4882a593Smuzhiyun 				spin_unlock(lock);
245*4882a593Smuzhiyun 			goto validate;
246*4882a593Smuzhiyun 		}
247*4882a593Smuzhiyun 
248*4882a593Smuzhiyun 		/* skb in gso_skb were already validated */
249*4882a593Smuzhiyun 		*validate = false;
250*4882a593Smuzhiyun 		if (xfrm_offload(skb))
251*4882a593Smuzhiyun 			*validate = true;
252*4882a593Smuzhiyun 		/* check the reason of requeuing without tx lock first */
253*4882a593Smuzhiyun 		txq = skb_get_tx_queue(txq->dev, skb);
254*4882a593Smuzhiyun 		if (!netif_xmit_frozen_or_stopped(txq)) {
255*4882a593Smuzhiyun 			skb = __skb_dequeue(&q->gso_skb);
256*4882a593Smuzhiyun 			if (qdisc_is_percpu_stats(q)) {
257*4882a593Smuzhiyun 				qdisc_qstats_cpu_backlog_dec(q, skb);
258*4882a593Smuzhiyun 				qdisc_qstats_cpu_qlen_dec(q);
259*4882a593Smuzhiyun 			} else {
260*4882a593Smuzhiyun 				qdisc_qstats_backlog_dec(q, skb);
261*4882a593Smuzhiyun 				q->q.qlen--;
262*4882a593Smuzhiyun 			}
263*4882a593Smuzhiyun 		} else {
264*4882a593Smuzhiyun 			skb = NULL;
265*4882a593Smuzhiyun 			qdisc_maybe_clear_missed(q, txq);
266*4882a593Smuzhiyun 		}
267*4882a593Smuzhiyun 		if (lock)
268*4882a593Smuzhiyun 			spin_unlock(lock);
269*4882a593Smuzhiyun 		goto trace;
270*4882a593Smuzhiyun 	}
271*4882a593Smuzhiyun validate:
272*4882a593Smuzhiyun 	*validate = true;
273*4882a593Smuzhiyun 
274*4882a593Smuzhiyun 	if ((q->flags & TCQ_F_ONETXQUEUE) &&
275*4882a593Smuzhiyun 	    netif_xmit_frozen_or_stopped(txq)) {
276*4882a593Smuzhiyun 		qdisc_maybe_clear_missed(q, txq);
277*4882a593Smuzhiyun 		return skb;
278*4882a593Smuzhiyun 	}
279*4882a593Smuzhiyun 
280*4882a593Smuzhiyun 	skb = qdisc_dequeue_skb_bad_txq(q);
281*4882a593Smuzhiyun 	if (unlikely(skb)) {
282*4882a593Smuzhiyun 		if (skb == SKB_XOFF_MAGIC)
283*4882a593Smuzhiyun 			return NULL;
284*4882a593Smuzhiyun 		goto bulk;
285*4882a593Smuzhiyun 	}
286*4882a593Smuzhiyun 	skb = q->dequeue(q);
287*4882a593Smuzhiyun 	if (skb) {
288*4882a593Smuzhiyun bulk:
289*4882a593Smuzhiyun 		if (qdisc_may_bulk(q))
290*4882a593Smuzhiyun 			try_bulk_dequeue_skb(q, skb, txq, packets);
291*4882a593Smuzhiyun 		else
292*4882a593Smuzhiyun 			try_bulk_dequeue_skb_slow(q, skb, packets);
293*4882a593Smuzhiyun 	}
294*4882a593Smuzhiyun trace:
295*4882a593Smuzhiyun 	trace_qdisc_dequeue(q, txq, *packets, skb);
296*4882a593Smuzhiyun 	return skb;
297*4882a593Smuzhiyun }
298*4882a593Smuzhiyun 
299*4882a593Smuzhiyun /*
300*4882a593Smuzhiyun  * Transmit possibly several skbs, and handle the return status as
301*4882a593Smuzhiyun  * required. Owning running seqcount bit guarantees that
302*4882a593Smuzhiyun  * only one CPU can execute this function.
303*4882a593Smuzhiyun  *
304*4882a593Smuzhiyun  * Returns to the caller:
305*4882a593Smuzhiyun  *				false  - hardware queue frozen backoff
306*4882a593Smuzhiyun  *				true   - feel free to send more pkts
307*4882a593Smuzhiyun  */
sch_direct_xmit(struct sk_buff * skb,struct Qdisc * q,struct net_device * dev,struct netdev_queue * txq,spinlock_t * root_lock,bool validate)308*4882a593Smuzhiyun bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
309*4882a593Smuzhiyun 		     struct net_device *dev, struct netdev_queue *txq,
310*4882a593Smuzhiyun 		     spinlock_t *root_lock, bool validate)
311*4882a593Smuzhiyun {
312*4882a593Smuzhiyun 	int ret = NETDEV_TX_BUSY;
313*4882a593Smuzhiyun 	bool again = false;
314*4882a593Smuzhiyun 
315*4882a593Smuzhiyun 	/* And release qdisc */
316*4882a593Smuzhiyun 	if (root_lock)
317*4882a593Smuzhiyun 		spin_unlock(root_lock);
318*4882a593Smuzhiyun 
319*4882a593Smuzhiyun 	/* Note that we validate skb (GSO, checksum, ...) outside of locks */
320*4882a593Smuzhiyun 	if (validate)
321*4882a593Smuzhiyun 		skb = validate_xmit_skb_list(skb, dev, &again);
322*4882a593Smuzhiyun 
323*4882a593Smuzhiyun #ifdef CONFIG_XFRM_OFFLOAD
324*4882a593Smuzhiyun 	if (unlikely(again)) {
325*4882a593Smuzhiyun 		if (root_lock)
326*4882a593Smuzhiyun 			spin_lock(root_lock);
327*4882a593Smuzhiyun 
328*4882a593Smuzhiyun 		dev_requeue_skb(skb, q);
329*4882a593Smuzhiyun 		return false;
330*4882a593Smuzhiyun 	}
331*4882a593Smuzhiyun #endif
332*4882a593Smuzhiyun 
333*4882a593Smuzhiyun 	if (likely(skb)) {
334*4882a593Smuzhiyun 		HARD_TX_LOCK(dev, txq, smp_processor_id());
335*4882a593Smuzhiyun 		if (!netif_xmit_frozen_or_stopped(txq))
336*4882a593Smuzhiyun 			skb = dev_hard_start_xmit(skb, dev, txq, &ret);
337*4882a593Smuzhiyun 		else
338*4882a593Smuzhiyun 			qdisc_maybe_clear_missed(q, txq);
339*4882a593Smuzhiyun 
340*4882a593Smuzhiyun 		HARD_TX_UNLOCK(dev, txq);
341*4882a593Smuzhiyun 	} else {
342*4882a593Smuzhiyun 		if (root_lock)
343*4882a593Smuzhiyun 			spin_lock(root_lock);
344*4882a593Smuzhiyun 		return true;
345*4882a593Smuzhiyun 	}
346*4882a593Smuzhiyun 
347*4882a593Smuzhiyun 	if (root_lock)
348*4882a593Smuzhiyun 		spin_lock(root_lock);
349*4882a593Smuzhiyun 
350*4882a593Smuzhiyun 	if (!dev_xmit_complete(ret)) {
351*4882a593Smuzhiyun 		/* Driver returned NETDEV_TX_BUSY - requeue skb */
352*4882a593Smuzhiyun 		if (unlikely(ret != NETDEV_TX_BUSY))
353*4882a593Smuzhiyun 			net_warn_ratelimited("BUG %s code %d qlen %d\n",
354*4882a593Smuzhiyun 					     dev->name, ret, q->q.qlen);
355*4882a593Smuzhiyun 
356*4882a593Smuzhiyun 		dev_requeue_skb(skb, q);
357*4882a593Smuzhiyun 		return false;
358*4882a593Smuzhiyun 	}
359*4882a593Smuzhiyun 
360*4882a593Smuzhiyun 	return true;
361*4882a593Smuzhiyun }
362*4882a593Smuzhiyun 
363*4882a593Smuzhiyun /*
364*4882a593Smuzhiyun  * NOTE: Called under qdisc_lock(q) with locally disabled BH.
365*4882a593Smuzhiyun  *
366*4882a593Smuzhiyun  * running seqcount guarantees only one CPU can process
367*4882a593Smuzhiyun  * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
368*4882a593Smuzhiyun  * this queue.
369*4882a593Smuzhiyun  *
370*4882a593Smuzhiyun  *  netif_tx_lock serializes accesses to device driver.
371*4882a593Smuzhiyun  *
372*4882a593Smuzhiyun  *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
373*4882a593Smuzhiyun  *  if one is grabbed, another must be free.
374*4882a593Smuzhiyun  *
375*4882a593Smuzhiyun  * Note, that this procedure can be called by a watchdog timer
376*4882a593Smuzhiyun  *
377*4882a593Smuzhiyun  * Returns to the caller:
378*4882a593Smuzhiyun  *				0  - queue is empty or throttled.
379*4882a593Smuzhiyun  *				>0 - queue is not empty.
380*4882a593Smuzhiyun  *
381*4882a593Smuzhiyun  */
qdisc_restart(struct Qdisc * q,int * packets)382*4882a593Smuzhiyun static inline bool qdisc_restart(struct Qdisc *q, int *packets)
383*4882a593Smuzhiyun {
384*4882a593Smuzhiyun 	spinlock_t *root_lock = NULL;
385*4882a593Smuzhiyun 	struct netdev_queue *txq;
386*4882a593Smuzhiyun 	struct net_device *dev;
387*4882a593Smuzhiyun 	struct sk_buff *skb;
388*4882a593Smuzhiyun 	bool validate;
389*4882a593Smuzhiyun 
390*4882a593Smuzhiyun 	/* Dequeue packet */
391*4882a593Smuzhiyun 	skb = dequeue_skb(q, &validate, packets);
392*4882a593Smuzhiyun 	if (unlikely(!skb))
393*4882a593Smuzhiyun 		return false;
394*4882a593Smuzhiyun 
395*4882a593Smuzhiyun 	if (!(q->flags & TCQ_F_NOLOCK))
396*4882a593Smuzhiyun 		root_lock = qdisc_lock(q);
397*4882a593Smuzhiyun 
398*4882a593Smuzhiyun 	dev = qdisc_dev(q);
399*4882a593Smuzhiyun 	txq = skb_get_tx_queue(dev, skb);
400*4882a593Smuzhiyun 
401*4882a593Smuzhiyun 	return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
402*4882a593Smuzhiyun }
403*4882a593Smuzhiyun 
__qdisc_run(struct Qdisc * q)404*4882a593Smuzhiyun void __qdisc_run(struct Qdisc *q)
405*4882a593Smuzhiyun {
406*4882a593Smuzhiyun 	int quota = READ_ONCE(dev_tx_weight);
407*4882a593Smuzhiyun 	int packets;
408*4882a593Smuzhiyun 
409*4882a593Smuzhiyun 	while (qdisc_restart(q, &packets)) {
410*4882a593Smuzhiyun 		quota -= packets;
411*4882a593Smuzhiyun 		if (quota <= 0) {
412*4882a593Smuzhiyun 			__netif_schedule(q);
413*4882a593Smuzhiyun 			break;
414*4882a593Smuzhiyun 		}
415*4882a593Smuzhiyun 	}
416*4882a593Smuzhiyun }
417*4882a593Smuzhiyun 
dev_trans_start(struct net_device * dev)418*4882a593Smuzhiyun unsigned long dev_trans_start(struct net_device *dev)
419*4882a593Smuzhiyun {
420*4882a593Smuzhiyun 	unsigned long val, res;
421*4882a593Smuzhiyun 	unsigned int i;
422*4882a593Smuzhiyun 
423*4882a593Smuzhiyun 	if (is_vlan_dev(dev))
424*4882a593Smuzhiyun 		dev = vlan_dev_real_dev(dev);
425*4882a593Smuzhiyun 	else if (netif_is_macvlan(dev))
426*4882a593Smuzhiyun 		dev = macvlan_dev_real_dev(dev);
427*4882a593Smuzhiyun 	res = netdev_get_tx_queue(dev, 0)->trans_start;
428*4882a593Smuzhiyun 	for (i = 1; i < dev->num_tx_queues; i++) {
429*4882a593Smuzhiyun 		val = netdev_get_tx_queue(dev, i)->trans_start;
430*4882a593Smuzhiyun 		if (val && time_after(val, res))
431*4882a593Smuzhiyun 			res = val;
432*4882a593Smuzhiyun 	}
433*4882a593Smuzhiyun 
434*4882a593Smuzhiyun 	return res;
435*4882a593Smuzhiyun }
436*4882a593Smuzhiyun EXPORT_SYMBOL(dev_trans_start);
437*4882a593Smuzhiyun 
dev_watchdog(struct timer_list * t)438*4882a593Smuzhiyun static void dev_watchdog(struct timer_list *t)
439*4882a593Smuzhiyun {
440*4882a593Smuzhiyun 	struct net_device *dev = from_timer(dev, t, watchdog_timer);
441*4882a593Smuzhiyun 
442*4882a593Smuzhiyun 	netif_tx_lock(dev);
443*4882a593Smuzhiyun 	if (!qdisc_tx_is_noop(dev)) {
444*4882a593Smuzhiyun 		if (netif_device_present(dev) &&
445*4882a593Smuzhiyun 		    netif_running(dev) &&
446*4882a593Smuzhiyun 		    netif_carrier_ok(dev)) {
447*4882a593Smuzhiyun 			int some_queue_timedout = 0;
448*4882a593Smuzhiyun 			unsigned int i;
449*4882a593Smuzhiyun 			unsigned long trans_start;
450*4882a593Smuzhiyun 
451*4882a593Smuzhiyun 			for (i = 0; i < dev->num_tx_queues; i++) {
452*4882a593Smuzhiyun 				struct netdev_queue *txq;
453*4882a593Smuzhiyun 
454*4882a593Smuzhiyun 				txq = netdev_get_tx_queue(dev, i);
455*4882a593Smuzhiyun 				trans_start = txq->trans_start;
456*4882a593Smuzhiyun 				if (netif_xmit_stopped(txq) &&
457*4882a593Smuzhiyun 				    time_after(jiffies, (trans_start +
458*4882a593Smuzhiyun 							 dev->watchdog_timeo))) {
459*4882a593Smuzhiyun 					some_queue_timedout = 1;
460*4882a593Smuzhiyun 					txq->trans_timeout++;
461*4882a593Smuzhiyun 					break;
462*4882a593Smuzhiyun 				}
463*4882a593Smuzhiyun 			}
464*4882a593Smuzhiyun 
465*4882a593Smuzhiyun 			if (some_queue_timedout) {
466*4882a593Smuzhiyun 				trace_net_dev_xmit_timeout(dev, i);
467*4882a593Smuzhiyun 				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
468*4882a593Smuzhiyun 				       dev->name, netdev_drivername(dev), i);
469*4882a593Smuzhiyun 				dev->netdev_ops->ndo_tx_timeout(dev, i);
470*4882a593Smuzhiyun 			}
471*4882a593Smuzhiyun 			if (!mod_timer(&dev->watchdog_timer,
472*4882a593Smuzhiyun 				       round_jiffies(jiffies +
473*4882a593Smuzhiyun 						     dev->watchdog_timeo)))
474*4882a593Smuzhiyun 				dev_hold(dev);
475*4882a593Smuzhiyun 		}
476*4882a593Smuzhiyun 	}
477*4882a593Smuzhiyun 	netif_tx_unlock(dev);
478*4882a593Smuzhiyun 
479*4882a593Smuzhiyun 	dev_put(dev);
480*4882a593Smuzhiyun }
481*4882a593Smuzhiyun 
__netdev_watchdog_up(struct net_device * dev)482*4882a593Smuzhiyun void __netdev_watchdog_up(struct net_device *dev)
483*4882a593Smuzhiyun {
484*4882a593Smuzhiyun 	if (dev->netdev_ops->ndo_tx_timeout) {
485*4882a593Smuzhiyun 		if (dev->watchdog_timeo <= 0)
486*4882a593Smuzhiyun 			dev->watchdog_timeo = 5*HZ;
487*4882a593Smuzhiyun 		if (!mod_timer(&dev->watchdog_timer,
488*4882a593Smuzhiyun 			       round_jiffies(jiffies + dev->watchdog_timeo)))
489*4882a593Smuzhiyun 			dev_hold(dev);
490*4882a593Smuzhiyun 	}
491*4882a593Smuzhiyun }
492*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(__netdev_watchdog_up);
493*4882a593Smuzhiyun 
dev_watchdog_up(struct net_device * dev)494*4882a593Smuzhiyun static void dev_watchdog_up(struct net_device *dev)
495*4882a593Smuzhiyun {
496*4882a593Smuzhiyun 	__netdev_watchdog_up(dev);
497*4882a593Smuzhiyun }
498*4882a593Smuzhiyun 
dev_watchdog_down(struct net_device * dev)499*4882a593Smuzhiyun static void dev_watchdog_down(struct net_device *dev)
500*4882a593Smuzhiyun {
501*4882a593Smuzhiyun 	netif_tx_lock_bh(dev);
502*4882a593Smuzhiyun 	if (del_timer(&dev->watchdog_timer))
503*4882a593Smuzhiyun 		dev_put(dev);
504*4882a593Smuzhiyun 	netif_tx_unlock_bh(dev);
505*4882a593Smuzhiyun }
506*4882a593Smuzhiyun 
507*4882a593Smuzhiyun /**
508*4882a593Smuzhiyun  *	netif_carrier_on - set carrier
509*4882a593Smuzhiyun  *	@dev: network device
510*4882a593Smuzhiyun  *
511*4882a593Smuzhiyun  * Device has detected acquisition of carrier.
512*4882a593Smuzhiyun  */
netif_carrier_on(struct net_device * dev)513*4882a593Smuzhiyun void netif_carrier_on(struct net_device *dev)
514*4882a593Smuzhiyun {
515*4882a593Smuzhiyun 	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
516*4882a593Smuzhiyun 		if (dev->reg_state == NETREG_UNINITIALIZED)
517*4882a593Smuzhiyun 			return;
518*4882a593Smuzhiyun 		atomic_inc(&dev->carrier_up_count);
519*4882a593Smuzhiyun 		linkwatch_fire_event(dev);
520*4882a593Smuzhiyun 		if (netif_running(dev))
521*4882a593Smuzhiyun 			__netdev_watchdog_up(dev);
522*4882a593Smuzhiyun 	}
523*4882a593Smuzhiyun }
524*4882a593Smuzhiyun EXPORT_SYMBOL(netif_carrier_on);
525*4882a593Smuzhiyun 
526*4882a593Smuzhiyun /**
527*4882a593Smuzhiyun  *	netif_carrier_off - clear carrier
528*4882a593Smuzhiyun  *	@dev: network device
529*4882a593Smuzhiyun  *
530*4882a593Smuzhiyun  * Device has detected loss of carrier.
531*4882a593Smuzhiyun  */
netif_carrier_off(struct net_device * dev)532*4882a593Smuzhiyun void netif_carrier_off(struct net_device *dev)
533*4882a593Smuzhiyun {
534*4882a593Smuzhiyun 	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
535*4882a593Smuzhiyun 		if (dev->reg_state == NETREG_UNINITIALIZED)
536*4882a593Smuzhiyun 			return;
537*4882a593Smuzhiyun 		atomic_inc(&dev->carrier_down_count);
538*4882a593Smuzhiyun 		linkwatch_fire_event(dev);
539*4882a593Smuzhiyun 	}
540*4882a593Smuzhiyun }
541*4882a593Smuzhiyun EXPORT_SYMBOL(netif_carrier_off);
542*4882a593Smuzhiyun 
543*4882a593Smuzhiyun /* "NOOP" scheduler: the best scheduler, recommended for all interfaces
544*4882a593Smuzhiyun    under all circumstances. It is difficult to invent anything faster or
545*4882a593Smuzhiyun    cheaper.
546*4882a593Smuzhiyun  */
547*4882a593Smuzhiyun 
noop_enqueue(struct sk_buff * skb,struct Qdisc * qdisc,struct sk_buff ** to_free)548*4882a593Smuzhiyun static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
549*4882a593Smuzhiyun 			struct sk_buff **to_free)
550*4882a593Smuzhiyun {
551*4882a593Smuzhiyun 	__qdisc_drop(skb, to_free);
552*4882a593Smuzhiyun 	return NET_XMIT_CN;
553*4882a593Smuzhiyun }
554*4882a593Smuzhiyun 
noop_dequeue(struct Qdisc * qdisc)555*4882a593Smuzhiyun static struct sk_buff *noop_dequeue(struct Qdisc *qdisc)
556*4882a593Smuzhiyun {
557*4882a593Smuzhiyun 	return NULL;
558*4882a593Smuzhiyun }
559*4882a593Smuzhiyun 
560*4882a593Smuzhiyun struct Qdisc_ops noop_qdisc_ops __read_mostly = {
561*4882a593Smuzhiyun 	.id		=	"noop",
562*4882a593Smuzhiyun 	.priv_size	=	0,
563*4882a593Smuzhiyun 	.enqueue	=	noop_enqueue,
564*4882a593Smuzhiyun 	.dequeue	=	noop_dequeue,
565*4882a593Smuzhiyun 	.peek		=	noop_dequeue,
566*4882a593Smuzhiyun 	.owner		=	THIS_MODULE,
567*4882a593Smuzhiyun };
568*4882a593Smuzhiyun 
569*4882a593Smuzhiyun static struct netdev_queue noop_netdev_queue = {
570*4882a593Smuzhiyun 	RCU_POINTER_INITIALIZER(qdisc, &noop_qdisc),
571*4882a593Smuzhiyun 	.qdisc_sleeping	=	&noop_qdisc,
572*4882a593Smuzhiyun };
573*4882a593Smuzhiyun 
574*4882a593Smuzhiyun struct Qdisc noop_qdisc = {
575*4882a593Smuzhiyun 	.enqueue	=	noop_enqueue,
576*4882a593Smuzhiyun 	.dequeue	=	noop_dequeue,
577*4882a593Smuzhiyun 	.flags		=	TCQ_F_BUILTIN,
578*4882a593Smuzhiyun 	.ops		=	&noop_qdisc_ops,
579*4882a593Smuzhiyun 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
580*4882a593Smuzhiyun 	.dev_queue	=	&noop_netdev_queue,
581*4882a593Smuzhiyun 	.running	=	SEQCNT_ZERO(noop_qdisc.running),
582*4882a593Smuzhiyun 	.busylock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
583*4882a593Smuzhiyun 	.gso_skb = {
584*4882a593Smuzhiyun 		.next = (struct sk_buff *)&noop_qdisc.gso_skb,
585*4882a593Smuzhiyun 		.prev = (struct sk_buff *)&noop_qdisc.gso_skb,
586*4882a593Smuzhiyun 		.qlen = 0,
587*4882a593Smuzhiyun 		.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.gso_skb.lock),
588*4882a593Smuzhiyun 	},
589*4882a593Smuzhiyun 	.skb_bad_txq = {
590*4882a593Smuzhiyun 		.next = (struct sk_buff *)&noop_qdisc.skb_bad_txq,
591*4882a593Smuzhiyun 		.prev = (struct sk_buff *)&noop_qdisc.skb_bad_txq,
592*4882a593Smuzhiyun 		.qlen = 0,
593*4882a593Smuzhiyun 		.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.skb_bad_txq.lock),
594*4882a593Smuzhiyun 	},
595*4882a593Smuzhiyun };
596*4882a593Smuzhiyun EXPORT_SYMBOL(noop_qdisc);
597*4882a593Smuzhiyun 
noqueue_init(struct Qdisc * qdisc,struct nlattr * opt,struct netlink_ext_ack * extack)598*4882a593Smuzhiyun static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt,
599*4882a593Smuzhiyun 			struct netlink_ext_ack *extack)
600*4882a593Smuzhiyun {
601*4882a593Smuzhiyun 	/* register_qdisc() assigns a default of noop_enqueue if unset,
602*4882a593Smuzhiyun 	 * but __dev_queue_xmit() treats noqueue only as such
603*4882a593Smuzhiyun 	 * if this is NULL - so clear it here. */
604*4882a593Smuzhiyun 	qdisc->enqueue = NULL;
605*4882a593Smuzhiyun 	return 0;
606*4882a593Smuzhiyun }
607*4882a593Smuzhiyun 
608*4882a593Smuzhiyun struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
609*4882a593Smuzhiyun 	.id		=	"noqueue",
610*4882a593Smuzhiyun 	.priv_size	=	0,
611*4882a593Smuzhiyun 	.init		=	noqueue_init,
612*4882a593Smuzhiyun 	.enqueue	=	noop_enqueue,
613*4882a593Smuzhiyun 	.dequeue	=	noop_dequeue,
614*4882a593Smuzhiyun 	.peek		=	noop_dequeue,
615*4882a593Smuzhiyun 	.owner		=	THIS_MODULE,
616*4882a593Smuzhiyun };
617*4882a593Smuzhiyun 
618*4882a593Smuzhiyun static const u8 prio2band[TC_PRIO_MAX + 1] = {
619*4882a593Smuzhiyun 	1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
620*4882a593Smuzhiyun };
621*4882a593Smuzhiyun 
622*4882a593Smuzhiyun /* 3-band FIFO queue: old style, but should be a bit faster than
623*4882a593Smuzhiyun    generic prio+fifo combination.
624*4882a593Smuzhiyun  */
625*4882a593Smuzhiyun 
626*4882a593Smuzhiyun #define PFIFO_FAST_BANDS 3
627*4882a593Smuzhiyun 
628*4882a593Smuzhiyun /*
629*4882a593Smuzhiyun  * Private data for a pfifo_fast scheduler containing:
630*4882a593Smuzhiyun  *	- rings for priority bands
631*4882a593Smuzhiyun  */
632*4882a593Smuzhiyun struct pfifo_fast_priv {
633*4882a593Smuzhiyun 	struct skb_array q[PFIFO_FAST_BANDS];
634*4882a593Smuzhiyun };
635*4882a593Smuzhiyun 
band2list(struct pfifo_fast_priv * priv,int band)636*4882a593Smuzhiyun static inline struct skb_array *band2list(struct pfifo_fast_priv *priv,
637*4882a593Smuzhiyun 					  int band)
638*4882a593Smuzhiyun {
639*4882a593Smuzhiyun 	return &priv->q[band];
640*4882a593Smuzhiyun }
641*4882a593Smuzhiyun 
pfifo_fast_enqueue(struct sk_buff * skb,struct Qdisc * qdisc,struct sk_buff ** to_free)642*4882a593Smuzhiyun static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
643*4882a593Smuzhiyun 			      struct sk_buff **to_free)
644*4882a593Smuzhiyun {
645*4882a593Smuzhiyun 	int band = prio2band[skb->priority & TC_PRIO_MAX];
646*4882a593Smuzhiyun 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
647*4882a593Smuzhiyun 	struct skb_array *q = band2list(priv, band);
648*4882a593Smuzhiyun 	unsigned int pkt_len = qdisc_pkt_len(skb);
649*4882a593Smuzhiyun 	int err;
650*4882a593Smuzhiyun 
651*4882a593Smuzhiyun 	err = skb_array_produce(q, skb);
652*4882a593Smuzhiyun 
653*4882a593Smuzhiyun 	if (unlikely(err)) {
654*4882a593Smuzhiyun 		if (qdisc_is_percpu_stats(qdisc))
655*4882a593Smuzhiyun 			return qdisc_drop_cpu(skb, qdisc, to_free);
656*4882a593Smuzhiyun 		else
657*4882a593Smuzhiyun 			return qdisc_drop(skb, qdisc, to_free);
658*4882a593Smuzhiyun 	}
659*4882a593Smuzhiyun 
660*4882a593Smuzhiyun 	qdisc_update_stats_at_enqueue(qdisc, pkt_len);
661*4882a593Smuzhiyun 	return NET_XMIT_SUCCESS;
662*4882a593Smuzhiyun }
663*4882a593Smuzhiyun 
pfifo_fast_dequeue(struct Qdisc * qdisc)664*4882a593Smuzhiyun static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
665*4882a593Smuzhiyun {
666*4882a593Smuzhiyun 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
667*4882a593Smuzhiyun 	struct sk_buff *skb = NULL;
668*4882a593Smuzhiyun 	bool need_retry = true;
669*4882a593Smuzhiyun 	int band;
670*4882a593Smuzhiyun 
671*4882a593Smuzhiyun retry:
672*4882a593Smuzhiyun 	for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
673*4882a593Smuzhiyun 		struct skb_array *q = band2list(priv, band);
674*4882a593Smuzhiyun 
675*4882a593Smuzhiyun 		if (__skb_array_empty(q))
676*4882a593Smuzhiyun 			continue;
677*4882a593Smuzhiyun 
678*4882a593Smuzhiyun 		skb = __skb_array_consume(q);
679*4882a593Smuzhiyun 	}
680*4882a593Smuzhiyun 	if (likely(skb)) {
681*4882a593Smuzhiyun 		qdisc_update_stats_at_dequeue(qdisc, skb);
682*4882a593Smuzhiyun 	} else if (need_retry &&
683*4882a593Smuzhiyun 		   test_bit(__QDISC_STATE_MISSED, &qdisc->state)) {
684*4882a593Smuzhiyun 		/* Delay clearing the STATE_MISSED here to reduce
685*4882a593Smuzhiyun 		 * the overhead of the second spin_trylock() in
686*4882a593Smuzhiyun 		 * qdisc_run_begin() and __netif_schedule() calling
687*4882a593Smuzhiyun 		 * in qdisc_run_end().
688*4882a593Smuzhiyun 		 */
689*4882a593Smuzhiyun 		clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
690*4882a593Smuzhiyun 
691*4882a593Smuzhiyun 		/* Make sure dequeuing happens after clearing
692*4882a593Smuzhiyun 		 * STATE_MISSED.
693*4882a593Smuzhiyun 		 */
694*4882a593Smuzhiyun 		smp_mb__after_atomic();
695*4882a593Smuzhiyun 
696*4882a593Smuzhiyun 		need_retry = false;
697*4882a593Smuzhiyun 
698*4882a593Smuzhiyun 		goto retry;
699*4882a593Smuzhiyun 	} else {
700*4882a593Smuzhiyun 		WRITE_ONCE(qdisc->empty, true);
701*4882a593Smuzhiyun 	}
702*4882a593Smuzhiyun 
703*4882a593Smuzhiyun 	return skb;
704*4882a593Smuzhiyun }
705*4882a593Smuzhiyun 
pfifo_fast_peek(struct Qdisc * qdisc)706*4882a593Smuzhiyun static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
707*4882a593Smuzhiyun {
708*4882a593Smuzhiyun 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
709*4882a593Smuzhiyun 	struct sk_buff *skb = NULL;
710*4882a593Smuzhiyun 	int band;
711*4882a593Smuzhiyun 
712*4882a593Smuzhiyun 	for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
713*4882a593Smuzhiyun 		struct skb_array *q = band2list(priv, band);
714*4882a593Smuzhiyun 
715*4882a593Smuzhiyun 		skb = __skb_array_peek(q);
716*4882a593Smuzhiyun 	}
717*4882a593Smuzhiyun 
718*4882a593Smuzhiyun 	return skb;
719*4882a593Smuzhiyun }
720*4882a593Smuzhiyun 
pfifo_fast_reset(struct Qdisc * qdisc)721*4882a593Smuzhiyun static void pfifo_fast_reset(struct Qdisc *qdisc)
722*4882a593Smuzhiyun {
723*4882a593Smuzhiyun 	int i, band;
724*4882a593Smuzhiyun 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
725*4882a593Smuzhiyun 
726*4882a593Smuzhiyun 	for (band = 0; band < PFIFO_FAST_BANDS; band++) {
727*4882a593Smuzhiyun 		struct skb_array *q = band2list(priv, band);
728*4882a593Smuzhiyun 		struct sk_buff *skb;
729*4882a593Smuzhiyun 
730*4882a593Smuzhiyun 		/* NULL ring is possible if destroy path is due to a failed
731*4882a593Smuzhiyun 		 * skb_array_init() in pfifo_fast_init() case.
732*4882a593Smuzhiyun 		 */
733*4882a593Smuzhiyun 		if (!q->ring.queue)
734*4882a593Smuzhiyun 			continue;
735*4882a593Smuzhiyun 
736*4882a593Smuzhiyun 		while ((skb = __skb_array_consume(q)) != NULL)
737*4882a593Smuzhiyun 			kfree_skb(skb);
738*4882a593Smuzhiyun 	}
739*4882a593Smuzhiyun 
740*4882a593Smuzhiyun 	if (qdisc_is_percpu_stats(qdisc)) {
741*4882a593Smuzhiyun 		for_each_possible_cpu(i) {
742*4882a593Smuzhiyun 			struct gnet_stats_queue *q;
743*4882a593Smuzhiyun 
744*4882a593Smuzhiyun 			q = per_cpu_ptr(qdisc->cpu_qstats, i);
745*4882a593Smuzhiyun 			q->backlog = 0;
746*4882a593Smuzhiyun 			q->qlen = 0;
747*4882a593Smuzhiyun 		}
748*4882a593Smuzhiyun 	}
749*4882a593Smuzhiyun }
750*4882a593Smuzhiyun 
pfifo_fast_dump(struct Qdisc * qdisc,struct sk_buff * skb)751*4882a593Smuzhiyun static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
752*4882a593Smuzhiyun {
753*4882a593Smuzhiyun 	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
754*4882a593Smuzhiyun 
755*4882a593Smuzhiyun 	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
756*4882a593Smuzhiyun 	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
757*4882a593Smuzhiyun 		goto nla_put_failure;
758*4882a593Smuzhiyun 	return skb->len;
759*4882a593Smuzhiyun 
760*4882a593Smuzhiyun nla_put_failure:
761*4882a593Smuzhiyun 	return -1;
762*4882a593Smuzhiyun }
763*4882a593Smuzhiyun 
pfifo_fast_init(struct Qdisc * qdisc,struct nlattr * opt,struct netlink_ext_ack * extack)764*4882a593Smuzhiyun static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt,
765*4882a593Smuzhiyun 			   struct netlink_ext_ack *extack)
766*4882a593Smuzhiyun {
767*4882a593Smuzhiyun 	unsigned int qlen = qdisc_dev(qdisc)->tx_queue_len;
768*4882a593Smuzhiyun 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
769*4882a593Smuzhiyun 	int prio;
770*4882a593Smuzhiyun 
771*4882a593Smuzhiyun 	/* guard against zero length rings */
772*4882a593Smuzhiyun 	if (!qlen)
773*4882a593Smuzhiyun 		return -EINVAL;
774*4882a593Smuzhiyun 
775*4882a593Smuzhiyun 	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
776*4882a593Smuzhiyun 		struct skb_array *q = band2list(priv, prio);
777*4882a593Smuzhiyun 		int err;
778*4882a593Smuzhiyun 
779*4882a593Smuzhiyun 		err = skb_array_init(q, qlen, GFP_KERNEL);
780*4882a593Smuzhiyun 		if (err)
781*4882a593Smuzhiyun 			return -ENOMEM;
782*4882a593Smuzhiyun 	}
783*4882a593Smuzhiyun 
784*4882a593Smuzhiyun 	/* Can by-pass the queue discipline */
785*4882a593Smuzhiyun 	qdisc->flags |= TCQ_F_CAN_BYPASS;
786*4882a593Smuzhiyun 	return 0;
787*4882a593Smuzhiyun }
788*4882a593Smuzhiyun 
pfifo_fast_destroy(struct Qdisc * sch)789*4882a593Smuzhiyun static void pfifo_fast_destroy(struct Qdisc *sch)
790*4882a593Smuzhiyun {
791*4882a593Smuzhiyun 	struct pfifo_fast_priv *priv = qdisc_priv(sch);
792*4882a593Smuzhiyun 	int prio;
793*4882a593Smuzhiyun 
794*4882a593Smuzhiyun 	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
795*4882a593Smuzhiyun 		struct skb_array *q = band2list(priv, prio);
796*4882a593Smuzhiyun 
797*4882a593Smuzhiyun 		/* NULL ring is possible if destroy path is due to a failed
798*4882a593Smuzhiyun 		 * skb_array_init() in pfifo_fast_init() case.
799*4882a593Smuzhiyun 		 */
800*4882a593Smuzhiyun 		if (!q->ring.queue)
801*4882a593Smuzhiyun 			continue;
802*4882a593Smuzhiyun 		/* Destroy ring but no need to kfree_skb because a call to
803*4882a593Smuzhiyun 		 * pfifo_fast_reset() has already done that work.
804*4882a593Smuzhiyun 		 */
805*4882a593Smuzhiyun 		ptr_ring_cleanup(&q->ring, NULL);
806*4882a593Smuzhiyun 	}
807*4882a593Smuzhiyun }
808*4882a593Smuzhiyun 
pfifo_fast_change_tx_queue_len(struct Qdisc * sch,unsigned int new_len)809*4882a593Smuzhiyun static int pfifo_fast_change_tx_queue_len(struct Qdisc *sch,
810*4882a593Smuzhiyun 					  unsigned int new_len)
811*4882a593Smuzhiyun {
812*4882a593Smuzhiyun 	struct pfifo_fast_priv *priv = qdisc_priv(sch);
813*4882a593Smuzhiyun 	struct skb_array *bands[PFIFO_FAST_BANDS];
814*4882a593Smuzhiyun 	int prio;
815*4882a593Smuzhiyun 
816*4882a593Smuzhiyun 	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
817*4882a593Smuzhiyun 		struct skb_array *q = band2list(priv, prio);
818*4882a593Smuzhiyun 
819*4882a593Smuzhiyun 		bands[prio] = q;
820*4882a593Smuzhiyun 	}
821*4882a593Smuzhiyun 
822*4882a593Smuzhiyun 	return skb_array_resize_multiple(bands, PFIFO_FAST_BANDS, new_len,
823*4882a593Smuzhiyun 					 GFP_KERNEL);
824*4882a593Smuzhiyun }
825*4882a593Smuzhiyun 
826*4882a593Smuzhiyun struct Qdisc_ops pfifo_fast_ops __read_mostly = {
827*4882a593Smuzhiyun 	.id		=	"pfifo_fast",
828*4882a593Smuzhiyun 	.priv_size	=	sizeof(struct pfifo_fast_priv),
829*4882a593Smuzhiyun 	.enqueue	=	pfifo_fast_enqueue,
830*4882a593Smuzhiyun 	.dequeue	=	pfifo_fast_dequeue,
831*4882a593Smuzhiyun 	.peek		=	pfifo_fast_peek,
832*4882a593Smuzhiyun 	.init		=	pfifo_fast_init,
833*4882a593Smuzhiyun 	.destroy	=	pfifo_fast_destroy,
834*4882a593Smuzhiyun 	.reset		=	pfifo_fast_reset,
835*4882a593Smuzhiyun 	.dump		=	pfifo_fast_dump,
836*4882a593Smuzhiyun 	.change_tx_queue_len =  pfifo_fast_change_tx_queue_len,
837*4882a593Smuzhiyun 	.owner		=	THIS_MODULE,
838*4882a593Smuzhiyun 	.static_flags	=	TCQ_F_NOLOCK | TCQ_F_CPUSTATS,
839*4882a593Smuzhiyun };
840*4882a593Smuzhiyun EXPORT_SYMBOL(pfifo_fast_ops);
841*4882a593Smuzhiyun 
842*4882a593Smuzhiyun static struct lock_class_key qdisc_tx_busylock;
843*4882a593Smuzhiyun static struct lock_class_key qdisc_running_key;
844*4882a593Smuzhiyun 
qdisc_alloc(struct netdev_queue * dev_queue,const struct Qdisc_ops * ops,struct netlink_ext_ack * extack)845*4882a593Smuzhiyun struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
846*4882a593Smuzhiyun 			  const struct Qdisc_ops *ops,
847*4882a593Smuzhiyun 			  struct netlink_ext_ack *extack)
848*4882a593Smuzhiyun {
849*4882a593Smuzhiyun 	struct Qdisc *sch;
850*4882a593Smuzhiyun 	unsigned int size = sizeof(*sch) + ops->priv_size;
851*4882a593Smuzhiyun 	int err = -ENOBUFS;
852*4882a593Smuzhiyun 	struct net_device *dev;
853*4882a593Smuzhiyun 
854*4882a593Smuzhiyun 	if (!dev_queue) {
855*4882a593Smuzhiyun 		NL_SET_ERR_MSG(extack, "No device queue given");
856*4882a593Smuzhiyun 		err = -EINVAL;
857*4882a593Smuzhiyun 		goto errout;
858*4882a593Smuzhiyun 	}
859*4882a593Smuzhiyun 
860*4882a593Smuzhiyun 	dev = dev_queue->dev;
861*4882a593Smuzhiyun 	sch = kzalloc_node(size, GFP_KERNEL, netdev_queue_numa_node_read(dev_queue));
862*4882a593Smuzhiyun 
863*4882a593Smuzhiyun 	if (!sch)
864*4882a593Smuzhiyun 		goto errout;
865*4882a593Smuzhiyun 	__skb_queue_head_init(&sch->gso_skb);
866*4882a593Smuzhiyun 	__skb_queue_head_init(&sch->skb_bad_txq);
867*4882a593Smuzhiyun 	qdisc_skb_head_init(&sch->q);
868*4882a593Smuzhiyun 	spin_lock_init(&sch->q.lock);
869*4882a593Smuzhiyun 
870*4882a593Smuzhiyun 	if (ops->static_flags & TCQ_F_CPUSTATS) {
871*4882a593Smuzhiyun 		sch->cpu_bstats =
872*4882a593Smuzhiyun 			netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
873*4882a593Smuzhiyun 		if (!sch->cpu_bstats)
874*4882a593Smuzhiyun 			goto errout1;
875*4882a593Smuzhiyun 
876*4882a593Smuzhiyun 		sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
877*4882a593Smuzhiyun 		if (!sch->cpu_qstats) {
878*4882a593Smuzhiyun 			free_percpu(sch->cpu_bstats);
879*4882a593Smuzhiyun 			goto errout1;
880*4882a593Smuzhiyun 		}
881*4882a593Smuzhiyun 	}
882*4882a593Smuzhiyun 
883*4882a593Smuzhiyun 	spin_lock_init(&sch->busylock);
884*4882a593Smuzhiyun 	lockdep_set_class(&sch->busylock,
885*4882a593Smuzhiyun 			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
886*4882a593Smuzhiyun 
887*4882a593Smuzhiyun 	/* seqlock has the same scope of busylock, for NOLOCK qdisc */
888*4882a593Smuzhiyun 	spin_lock_init(&sch->seqlock);
889*4882a593Smuzhiyun 	lockdep_set_class(&sch->seqlock,
890*4882a593Smuzhiyun 			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
891*4882a593Smuzhiyun 
892*4882a593Smuzhiyun 	seqcount_init(&sch->running);
893*4882a593Smuzhiyun 	lockdep_set_class(&sch->running,
894*4882a593Smuzhiyun 			  dev->qdisc_running_key ?: &qdisc_running_key);
895*4882a593Smuzhiyun 
896*4882a593Smuzhiyun 	sch->ops = ops;
897*4882a593Smuzhiyun 	sch->flags = ops->static_flags;
898*4882a593Smuzhiyun 	sch->enqueue = ops->enqueue;
899*4882a593Smuzhiyun 	sch->dequeue = ops->dequeue;
900*4882a593Smuzhiyun 	sch->dev_queue = dev_queue;
901*4882a593Smuzhiyun 	sch->empty = true;
902*4882a593Smuzhiyun 	dev_hold(dev);
903*4882a593Smuzhiyun 	refcount_set(&sch->refcnt, 1);
904*4882a593Smuzhiyun 
905*4882a593Smuzhiyun 	return sch;
906*4882a593Smuzhiyun errout1:
907*4882a593Smuzhiyun 	kfree(sch);
908*4882a593Smuzhiyun errout:
909*4882a593Smuzhiyun 	return ERR_PTR(err);
910*4882a593Smuzhiyun }
911*4882a593Smuzhiyun 
qdisc_create_dflt(struct netdev_queue * dev_queue,const struct Qdisc_ops * ops,unsigned int parentid,struct netlink_ext_ack * extack)912*4882a593Smuzhiyun struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
913*4882a593Smuzhiyun 				const struct Qdisc_ops *ops,
914*4882a593Smuzhiyun 				unsigned int parentid,
915*4882a593Smuzhiyun 				struct netlink_ext_ack *extack)
916*4882a593Smuzhiyun {
917*4882a593Smuzhiyun 	struct Qdisc *sch;
918*4882a593Smuzhiyun 
919*4882a593Smuzhiyun 	if (!try_module_get(ops->owner)) {
920*4882a593Smuzhiyun 		NL_SET_ERR_MSG(extack, "Failed to increase module reference counter");
921*4882a593Smuzhiyun 		return NULL;
922*4882a593Smuzhiyun 	}
923*4882a593Smuzhiyun 
924*4882a593Smuzhiyun 	sch = qdisc_alloc(dev_queue, ops, extack);
925*4882a593Smuzhiyun 	if (IS_ERR(sch)) {
926*4882a593Smuzhiyun 		module_put(ops->owner);
927*4882a593Smuzhiyun 		return NULL;
928*4882a593Smuzhiyun 	}
929*4882a593Smuzhiyun 	sch->parent = parentid;
930*4882a593Smuzhiyun 
931*4882a593Smuzhiyun 	if (!ops->init || ops->init(sch, NULL, extack) == 0) {
932*4882a593Smuzhiyun 		trace_qdisc_create(ops, dev_queue->dev, parentid);
933*4882a593Smuzhiyun 		return sch;
934*4882a593Smuzhiyun 	}
935*4882a593Smuzhiyun 
936*4882a593Smuzhiyun 	qdisc_put(sch);
937*4882a593Smuzhiyun 	return NULL;
938*4882a593Smuzhiyun }
939*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_create_dflt);
940*4882a593Smuzhiyun 
941*4882a593Smuzhiyun /* Under qdisc_lock(qdisc) and BH! */
942*4882a593Smuzhiyun 
qdisc_reset(struct Qdisc * qdisc)943*4882a593Smuzhiyun void qdisc_reset(struct Qdisc *qdisc)
944*4882a593Smuzhiyun {
945*4882a593Smuzhiyun 	const struct Qdisc_ops *ops = qdisc->ops;
946*4882a593Smuzhiyun 	struct sk_buff *skb, *tmp;
947*4882a593Smuzhiyun 
948*4882a593Smuzhiyun 	trace_qdisc_reset(qdisc);
949*4882a593Smuzhiyun 
950*4882a593Smuzhiyun 	if (ops->reset)
951*4882a593Smuzhiyun 		ops->reset(qdisc);
952*4882a593Smuzhiyun 
953*4882a593Smuzhiyun 	skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
954*4882a593Smuzhiyun 		__skb_unlink(skb, &qdisc->gso_skb);
955*4882a593Smuzhiyun 		kfree_skb_list(skb);
956*4882a593Smuzhiyun 	}
957*4882a593Smuzhiyun 
958*4882a593Smuzhiyun 	skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
959*4882a593Smuzhiyun 		__skb_unlink(skb, &qdisc->skb_bad_txq);
960*4882a593Smuzhiyun 		kfree_skb_list(skb);
961*4882a593Smuzhiyun 	}
962*4882a593Smuzhiyun 
963*4882a593Smuzhiyun 	qdisc->q.qlen = 0;
964*4882a593Smuzhiyun 	qdisc->qstats.backlog = 0;
965*4882a593Smuzhiyun }
966*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_reset);
967*4882a593Smuzhiyun 
qdisc_free(struct Qdisc * qdisc)968*4882a593Smuzhiyun void qdisc_free(struct Qdisc *qdisc)
969*4882a593Smuzhiyun {
970*4882a593Smuzhiyun 	if (qdisc_is_percpu_stats(qdisc)) {
971*4882a593Smuzhiyun 		free_percpu(qdisc->cpu_bstats);
972*4882a593Smuzhiyun 		free_percpu(qdisc->cpu_qstats);
973*4882a593Smuzhiyun 	}
974*4882a593Smuzhiyun 
975*4882a593Smuzhiyun 	kfree(qdisc);
976*4882a593Smuzhiyun }
977*4882a593Smuzhiyun 
qdisc_free_cb(struct rcu_head * head)978*4882a593Smuzhiyun static void qdisc_free_cb(struct rcu_head *head)
979*4882a593Smuzhiyun {
980*4882a593Smuzhiyun 	struct Qdisc *q = container_of(head, struct Qdisc, rcu);
981*4882a593Smuzhiyun 
982*4882a593Smuzhiyun 	qdisc_free(q);
983*4882a593Smuzhiyun }
984*4882a593Smuzhiyun 
qdisc_destroy(struct Qdisc * qdisc)985*4882a593Smuzhiyun static void qdisc_destroy(struct Qdisc *qdisc)
986*4882a593Smuzhiyun {
987*4882a593Smuzhiyun 	const struct Qdisc_ops  *ops = qdisc->ops;
988*4882a593Smuzhiyun 
989*4882a593Smuzhiyun #ifdef CONFIG_NET_SCHED
990*4882a593Smuzhiyun 	qdisc_hash_del(qdisc);
991*4882a593Smuzhiyun 
992*4882a593Smuzhiyun 	qdisc_put_stab(rtnl_dereference(qdisc->stab));
993*4882a593Smuzhiyun #endif
994*4882a593Smuzhiyun 	gen_kill_estimator(&qdisc->rate_est);
995*4882a593Smuzhiyun 
996*4882a593Smuzhiyun 	qdisc_reset(qdisc);
997*4882a593Smuzhiyun 
998*4882a593Smuzhiyun 	if (ops->destroy)
999*4882a593Smuzhiyun 		ops->destroy(qdisc);
1000*4882a593Smuzhiyun 
1001*4882a593Smuzhiyun 	module_put(ops->owner);
1002*4882a593Smuzhiyun 	dev_put(qdisc_dev(qdisc));
1003*4882a593Smuzhiyun 
1004*4882a593Smuzhiyun 	trace_qdisc_destroy(qdisc);
1005*4882a593Smuzhiyun 
1006*4882a593Smuzhiyun 	call_rcu(&qdisc->rcu, qdisc_free_cb);
1007*4882a593Smuzhiyun }
1008*4882a593Smuzhiyun 
qdisc_put(struct Qdisc * qdisc)1009*4882a593Smuzhiyun void qdisc_put(struct Qdisc *qdisc)
1010*4882a593Smuzhiyun {
1011*4882a593Smuzhiyun 	if (!qdisc)
1012*4882a593Smuzhiyun 		return;
1013*4882a593Smuzhiyun 
1014*4882a593Smuzhiyun 	if (qdisc->flags & TCQ_F_BUILTIN ||
1015*4882a593Smuzhiyun 	    !refcount_dec_and_test(&qdisc->refcnt))
1016*4882a593Smuzhiyun 		return;
1017*4882a593Smuzhiyun 
1018*4882a593Smuzhiyun 	qdisc_destroy(qdisc);
1019*4882a593Smuzhiyun }
1020*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_put);
1021*4882a593Smuzhiyun 
1022*4882a593Smuzhiyun /* Version of qdisc_put() that is called with rtnl mutex unlocked.
1023*4882a593Smuzhiyun  * Intended to be used as optimization, this function only takes rtnl lock if
1024*4882a593Smuzhiyun  * qdisc reference counter reached zero.
1025*4882a593Smuzhiyun  */
1026*4882a593Smuzhiyun 
qdisc_put_unlocked(struct Qdisc * qdisc)1027*4882a593Smuzhiyun void qdisc_put_unlocked(struct Qdisc *qdisc)
1028*4882a593Smuzhiyun {
1029*4882a593Smuzhiyun 	if (qdisc->flags & TCQ_F_BUILTIN ||
1030*4882a593Smuzhiyun 	    !refcount_dec_and_rtnl_lock(&qdisc->refcnt))
1031*4882a593Smuzhiyun 		return;
1032*4882a593Smuzhiyun 
1033*4882a593Smuzhiyun 	qdisc_destroy(qdisc);
1034*4882a593Smuzhiyun 	rtnl_unlock();
1035*4882a593Smuzhiyun }
1036*4882a593Smuzhiyun EXPORT_SYMBOL(qdisc_put_unlocked);
1037*4882a593Smuzhiyun 
1038*4882a593Smuzhiyun /* Attach toplevel qdisc to device queue. */
dev_graft_qdisc(struct netdev_queue * dev_queue,struct Qdisc * qdisc)1039*4882a593Smuzhiyun struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
1040*4882a593Smuzhiyun 			      struct Qdisc *qdisc)
1041*4882a593Smuzhiyun {
1042*4882a593Smuzhiyun 	struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
1043*4882a593Smuzhiyun 	spinlock_t *root_lock;
1044*4882a593Smuzhiyun 
1045*4882a593Smuzhiyun 	root_lock = qdisc_lock(oqdisc);
1046*4882a593Smuzhiyun 	spin_lock_bh(root_lock);
1047*4882a593Smuzhiyun 
1048*4882a593Smuzhiyun 	/* ... and graft new one */
1049*4882a593Smuzhiyun 	if (qdisc == NULL)
1050*4882a593Smuzhiyun 		qdisc = &noop_qdisc;
1051*4882a593Smuzhiyun 	dev_queue->qdisc_sleeping = qdisc;
1052*4882a593Smuzhiyun 	rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
1053*4882a593Smuzhiyun 
1054*4882a593Smuzhiyun 	spin_unlock_bh(root_lock);
1055*4882a593Smuzhiyun 
1056*4882a593Smuzhiyun 	return oqdisc;
1057*4882a593Smuzhiyun }
1058*4882a593Smuzhiyun EXPORT_SYMBOL(dev_graft_qdisc);
1059*4882a593Smuzhiyun 
shutdown_scheduler_queue(struct net_device * dev,struct netdev_queue * dev_queue,void * _qdisc_default)1060*4882a593Smuzhiyun static void shutdown_scheduler_queue(struct net_device *dev,
1061*4882a593Smuzhiyun 				     struct netdev_queue *dev_queue,
1062*4882a593Smuzhiyun 				     void *_qdisc_default)
1063*4882a593Smuzhiyun {
1064*4882a593Smuzhiyun 	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
1065*4882a593Smuzhiyun 	struct Qdisc *qdisc_default = _qdisc_default;
1066*4882a593Smuzhiyun 
1067*4882a593Smuzhiyun 	if (qdisc) {
1068*4882a593Smuzhiyun 		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
1069*4882a593Smuzhiyun 		dev_queue->qdisc_sleeping = qdisc_default;
1070*4882a593Smuzhiyun 
1071*4882a593Smuzhiyun 		qdisc_put(qdisc);
1072*4882a593Smuzhiyun 	}
1073*4882a593Smuzhiyun }
1074*4882a593Smuzhiyun 
attach_one_default_qdisc(struct net_device * dev,struct netdev_queue * dev_queue,void * _unused)1075*4882a593Smuzhiyun static void attach_one_default_qdisc(struct net_device *dev,
1076*4882a593Smuzhiyun 				     struct netdev_queue *dev_queue,
1077*4882a593Smuzhiyun 				     void *_unused)
1078*4882a593Smuzhiyun {
1079*4882a593Smuzhiyun 	struct Qdisc *qdisc;
1080*4882a593Smuzhiyun 	const struct Qdisc_ops *ops = default_qdisc_ops;
1081*4882a593Smuzhiyun 
1082*4882a593Smuzhiyun 	if (dev->priv_flags & IFF_NO_QUEUE)
1083*4882a593Smuzhiyun 		ops = &noqueue_qdisc_ops;
1084*4882a593Smuzhiyun 	else if(dev->type == ARPHRD_CAN)
1085*4882a593Smuzhiyun 		ops = &pfifo_fast_ops;
1086*4882a593Smuzhiyun 
1087*4882a593Smuzhiyun 	qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL);
1088*4882a593Smuzhiyun 	if (!qdisc)
1089*4882a593Smuzhiyun 		return;
1090*4882a593Smuzhiyun 
1091*4882a593Smuzhiyun 	if (!netif_is_multiqueue(dev))
1092*4882a593Smuzhiyun 		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1093*4882a593Smuzhiyun 	dev_queue->qdisc_sleeping = qdisc;
1094*4882a593Smuzhiyun }
1095*4882a593Smuzhiyun 
attach_default_qdiscs(struct net_device * dev)1096*4882a593Smuzhiyun static void attach_default_qdiscs(struct net_device *dev)
1097*4882a593Smuzhiyun {
1098*4882a593Smuzhiyun 	struct netdev_queue *txq;
1099*4882a593Smuzhiyun 	struct Qdisc *qdisc;
1100*4882a593Smuzhiyun 
1101*4882a593Smuzhiyun 	txq = netdev_get_tx_queue(dev, 0);
1102*4882a593Smuzhiyun 
1103*4882a593Smuzhiyun 	if (!netif_is_multiqueue(dev) ||
1104*4882a593Smuzhiyun 	    dev->priv_flags & IFF_NO_QUEUE) {
1105*4882a593Smuzhiyun 		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
1106*4882a593Smuzhiyun 		qdisc = txq->qdisc_sleeping;
1107*4882a593Smuzhiyun 		rcu_assign_pointer(dev->qdisc, qdisc);
1108*4882a593Smuzhiyun 		qdisc_refcount_inc(qdisc);
1109*4882a593Smuzhiyun 	} else {
1110*4882a593Smuzhiyun 		qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL);
1111*4882a593Smuzhiyun 		if (qdisc) {
1112*4882a593Smuzhiyun 			rcu_assign_pointer(dev->qdisc, qdisc);
1113*4882a593Smuzhiyun 			qdisc->ops->attach(qdisc);
1114*4882a593Smuzhiyun 		}
1115*4882a593Smuzhiyun 	}
1116*4882a593Smuzhiyun 	qdisc = rtnl_dereference(dev->qdisc);
1117*4882a593Smuzhiyun 
1118*4882a593Smuzhiyun 	/* Detect default qdisc setup/init failed and fallback to "noqueue" */
1119*4882a593Smuzhiyun 	if (qdisc == &noop_qdisc) {
1120*4882a593Smuzhiyun 		netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n",
1121*4882a593Smuzhiyun 			    default_qdisc_ops->id, noqueue_qdisc_ops.id);
1122*4882a593Smuzhiyun 		netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
1123*4882a593Smuzhiyun 		dev->priv_flags |= IFF_NO_QUEUE;
1124*4882a593Smuzhiyun 		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
1125*4882a593Smuzhiyun 		qdisc = txq->qdisc_sleeping;
1126*4882a593Smuzhiyun 		rcu_assign_pointer(dev->qdisc, qdisc);
1127*4882a593Smuzhiyun 		qdisc_refcount_inc(qdisc);
1128*4882a593Smuzhiyun 		dev->priv_flags ^= IFF_NO_QUEUE;
1129*4882a593Smuzhiyun 	}
1130*4882a593Smuzhiyun 
1131*4882a593Smuzhiyun #ifdef CONFIG_NET_SCHED
1132*4882a593Smuzhiyun 	if (qdisc != &noop_qdisc)
1133*4882a593Smuzhiyun 		qdisc_hash_add(qdisc, false);
1134*4882a593Smuzhiyun #endif
1135*4882a593Smuzhiyun }
1136*4882a593Smuzhiyun 
transition_one_qdisc(struct net_device * dev,struct netdev_queue * dev_queue,void * _need_watchdog)1137*4882a593Smuzhiyun static void transition_one_qdisc(struct net_device *dev,
1138*4882a593Smuzhiyun 				 struct netdev_queue *dev_queue,
1139*4882a593Smuzhiyun 				 void *_need_watchdog)
1140*4882a593Smuzhiyun {
1141*4882a593Smuzhiyun 	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
1142*4882a593Smuzhiyun 	int *need_watchdog_p = _need_watchdog;
1143*4882a593Smuzhiyun 
1144*4882a593Smuzhiyun 	if (!(new_qdisc->flags & TCQ_F_BUILTIN))
1145*4882a593Smuzhiyun 		clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);
1146*4882a593Smuzhiyun 
1147*4882a593Smuzhiyun 	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
1148*4882a593Smuzhiyun 	if (need_watchdog_p) {
1149*4882a593Smuzhiyun 		dev_queue->trans_start = 0;
1150*4882a593Smuzhiyun 		*need_watchdog_p = 1;
1151*4882a593Smuzhiyun 	}
1152*4882a593Smuzhiyun }
1153*4882a593Smuzhiyun 
dev_activate(struct net_device * dev)1154*4882a593Smuzhiyun void dev_activate(struct net_device *dev)
1155*4882a593Smuzhiyun {
1156*4882a593Smuzhiyun 	int need_watchdog;
1157*4882a593Smuzhiyun 
1158*4882a593Smuzhiyun 	/* No queueing discipline is attached to device;
1159*4882a593Smuzhiyun 	 * create default one for devices, which need queueing
1160*4882a593Smuzhiyun 	 * and noqueue_qdisc for virtual interfaces
1161*4882a593Smuzhiyun 	 */
1162*4882a593Smuzhiyun 
1163*4882a593Smuzhiyun 	if (rtnl_dereference(dev->qdisc) == &noop_qdisc)
1164*4882a593Smuzhiyun 		attach_default_qdiscs(dev);
1165*4882a593Smuzhiyun 
1166*4882a593Smuzhiyun 	if (!netif_carrier_ok(dev))
1167*4882a593Smuzhiyun 		/* Delay activation until next carrier-on event */
1168*4882a593Smuzhiyun 		return;
1169*4882a593Smuzhiyun 
1170*4882a593Smuzhiyun 	need_watchdog = 0;
1171*4882a593Smuzhiyun 	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
1172*4882a593Smuzhiyun 	if (dev_ingress_queue(dev))
1173*4882a593Smuzhiyun 		transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
1174*4882a593Smuzhiyun 
1175*4882a593Smuzhiyun 	if (need_watchdog) {
1176*4882a593Smuzhiyun 		netif_trans_update(dev);
1177*4882a593Smuzhiyun 		dev_watchdog_up(dev);
1178*4882a593Smuzhiyun 	}
1179*4882a593Smuzhiyun }
1180*4882a593Smuzhiyun EXPORT_SYMBOL(dev_activate);
1181*4882a593Smuzhiyun 
qdisc_deactivate(struct Qdisc * qdisc)1182*4882a593Smuzhiyun static void qdisc_deactivate(struct Qdisc *qdisc)
1183*4882a593Smuzhiyun {
1184*4882a593Smuzhiyun 	if (qdisc->flags & TCQ_F_BUILTIN)
1185*4882a593Smuzhiyun 		return;
1186*4882a593Smuzhiyun 
1187*4882a593Smuzhiyun 	set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
1188*4882a593Smuzhiyun }
1189*4882a593Smuzhiyun 
dev_deactivate_queue(struct net_device * dev,struct netdev_queue * dev_queue,void * _qdisc_default)1190*4882a593Smuzhiyun static void dev_deactivate_queue(struct net_device *dev,
1191*4882a593Smuzhiyun 				 struct netdev_queue *dev_queue,
1192*4882a593Smuzhiyun 				 void *_qdisc_default)
1193*4882a593Smuzhiyun {
1194*4882a593Smuzhiyun 	struct Qdisc *qdisc_default = _qdisc_default;
1195*4882a593Smuzhiyun 	struct Qdisc *qdisc;
1196*4882a593Smuzhiyun 
1197*4882a593Smuzhiyun 	qdisc = rtnl_dereference(dev_queue->qdisc);
1198*4882a593Smuzhiyun 	if (qdisc) {
1199*4882a593Smuzhiyun 		qdisc_deactivate(qdisc);
1200*4882a593Smuzhiyun 		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
1201*4882a593Smuzhiyun 	}
1202*4882a593Smuzhiyun }
1203*4882a593Smuzhiyun 
dev_reset_queue(struct net_device * dev,struct netdev_queue * dev_queue,void * _unused)1204*4882a593Smuzhiyun static void dev_reset_queue(struct net_device *dev,
1205*4882a593Smuzhiyun 			    struct netdev_queue *dev_queue,
1206*4882a593Smuzhiyun 			    void *_unused)
1207*4882a593Smuzhiyun {
1208*4882a593Smuzhiyun 	struct Qdisc *qdisc;
1209*4882a593Smuzhiyun 	bool nolock;
1210*4882a593Smuzhiyun 
1211*4882a593Smuzhiyun 	qdisc = dev_queue->qdisc_sleeping;
1212*4882a593Smuzhiyun 	if (!qdisc)
1213*4882a593Smuzhiyun 		return;
1214*4882a593Smuzhiyun 
1215*4882a593Smuzhiyun 	nolock = qdisc->flags & TCQ_F_NOLOCK;
1216*4882a593Smuzhiyun 
1217*4882a593Smuzhiyun 	if (nolock)
1218*4882a593Smuzhiyun 		spin_lock_bh(&qdisc->seqlock);
1219*4882a593Smuzhiyun 	spin_lock_bh(qdisc_lock(qdisc));
1220*4882a593Smuzhiyun 
1221*4882a593Smuzhiyun 	qdisc_reset(qdisc);
1222*4882a593Smuzhiyun 
1223*4882a593Smuzhiyun 	spin_unlock_bh(qdisc_lock(qdisc));
1224*4882a593Smuzhiyun 	if (nolock) {
1225*4882a593Smuzhiyun 		clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
1226*4882a593Smuzhiyun 		spin_unlock_bh(&qdisc->seqlock);
1227*4882a593Smuzhiyun 	}
1228*4882a593Smuzhiyun }
1229*4882a593Smuzhiyun 
some_qdisc_is_busy(struct net_device * dev)1230*4882a593Smuzhiyun static bool some_qdisc_is_busy(struct net_device *dev)
1231*4882a593Smuzhiyun {
1232*4882a593Smuzhiyun 	unsigned int i;
1233*4882a593Smuzhiyun 
1234*4882a593Smuzhiyun 	for (i = 0; i < dev->num_tx_queues; i++) {
1235*4882a593Smuzhiyun 		struct netdev_queue *dev_queue;
1236*4882a593Smuzhiyun 		spinlock_t *root_lock;
1237*4882a593Smuzhiyun 		struct Qdisc *q;
1238*4882a593Smuzhiyun 		int val;
1239*4882a593Smuzhiyun 
1240*4882a593Smuzhiyun 		dev_queue = netdev_get_tx_queue(dev, i);
1241*4882a593Smuzhiyun 		q = dev_queue->qdisc_sleeping;
1242*4882a593Smuzhiyun 
1243*4882a593Smuzhiyun 		root_lock = qdisc_lock(q);
1244*4882a593Smuzhiyun 		spin_lock_bh(root_lock);
1245*4882a593Smuzhiyun 
1246*4882a593Smuzhiyun 		val = (qdisc_is_running(q) ||
1247*4882a593Smuzhiyun 		       test_bit(__QDISC_STATE_SCHED, &q->state));
1248*4882a593Smuzhiyun 
1249*4882a593Smuzhiyun 		spin_unlock_bh(root_lock);
1250*4882a593Smuzhiyun 
1251*4882a593Smuzhiyun 		if (val)
1252*4882a593Smuzhiyun 			return true;
1253*4882a593Smuzhiyun 	}
1254*4882a593Smuzhiyun 	return false;
1255*4882a593Smuzhiyun }
1256*4882a593Smuzhiyun 
1257*4882a593Smuzhiyun /**
1258*4882a593Smuzhiyun  * 	dev_deactivate_many - deactivate transmissions on several devices
1259*4882a593Smuzhiyun  * 	@head: list of devices to deactivate
1260*4882a593Smuzhiyun  *
1261*4882a593Smuzhiyun  *	This function returns only when all outstanding transmissions
1262*4882a593Smuzhiyun  *	have completed, unless all devices are in dismantle phase.
1263*4882a593Smuzhiyun  */
dev_deactivate_many(struct list_head * head)1264*4882a593Smuzhiyun void dev_deactivate_many(struct list_head *head)
1265*4882a593Smuzhiyun {
1266*4882a593Smuzhiyun 	struct net_device *dev;
1267*4882a593Smuzhiyun 
1268*4882a593Smuzhiyun 	list_for_each_entry(dev, head, close_list) {
1269*4882a593Smuzhiyun 		netdev_for_each_tx_queue(dev, dev_deactivate_queue,
1270*4882a593Smuzhiyun 					 &noop_qdisc);
1271*4882a593Smuzhiyun 		if (dev_ingress_queue(dev))
1272*4882a593Smuzhiyun 			dev_deactivate_queue(dev, dev_ingress_queue(dev),
1273*4882a593Smuzhiyun 					     &noop_qdisc);
1274*4882a593Smuzhiyun 
1275*4882a593Smuzhiyun 		dev_watchdog_down(dev);
1276*4882a593Smuzhiyun 	}
1277*4882a593Smuzhiyun 
1278*4882a593Smuzhiyun 	/* Wait for outstanding qdisc-less dev_queue_xmit calls or
1279*4882a593Smuzhiyun 	 * outstanding qdisc enqueuing calls.
1280*4882a593Smuzhiyun 	 * This is avoided if all devices are in dismantle phase :
1281*4882a593Smuzhiyun 	 * Caller will call synchronize_net() for us
1282*4882a593Smuzhiyun 	 */
1283*4882a593Smuzhiyun 	synchronize_net();
1284*4882a593Smuzhiyun 
1285*4882a593Smuzhiyun 	list_for_each_entry(dev, head, close_list) {
1286*4882a593Smuzhiyun 		netdev_for_each_tx_queue(dev, dev_reset_queue, NULL);
1287*4882a593Smuzhiyun 
1288*4882a593Smuzhiyun 		if (dev_ingress_queue(dev))
1289*4882a593Smuzhiyun 			dev_reset_queue(dev, dev_ingress_queue(dev), NULL);
1290*4882a593Smuzhiyun 	}
1291*4882a593Smuzhiyun 
1292*4882a593Smuzhiyun 	/* Wait for outstanding qdisc_run calls. */
1293*4882a593Smuzhiyun 	list_for_each_entry(dev, head, close_list) {
1294*4882a593Smuzhiyun 		while (some_qdisc_is_busy(dev)) {
1295*4882a593Smuzhiyun 			/* wait_event() would avoid this sleep-loop but would
1296*4882a593Smuzhiyun 			 * require expensive checks in the fast paths of packet
1297*4882a593Smuzhiyun 			 * processing which isn't worth it.
1298*4882a593Smuzhiyun 			 */
1299*4882a593Smuzhiyun 			schedule_timeout_uninterruptible(1);
1300*4882a593Smuzhiyun 		}
1301*4882a593Smuzhiyun 	}
1302*4882a593Smuzhiyun }
1303*4882a593Smuzhiyun 
dev_deactivate(struct net_device * dev)1304*4882a593Smuzhiyun void dev_deactivate(struct net_device *dev)
1305*4882a593Smuzhiyun {
1306*4882a593Smuzhiyun 	LIST_HEAD(single);
1307*4882a593Smuzhiyun 
1308*4882a593Smuzhiyun 	list_add(&dev->close_list, &single);
1309*4882a593Smuzhiyun 	dev_deactivate_many(&single);
1310*4882a593Smuzhiyun 	list_del(&single);
1311*4882a593Smuzhiyun }
1312*4882a593Smuzhiyun EXPORT_SYMBOL(dev_deactivate);
1313*4882a593Smuzhiyun 
qdisc_change_tx_queue_len(struct net_device * dev,struct netdev_queue * dev_queue)1314*4882a593Smuzhiyun static int qdisc_change_tx_queue_len(struct net_device *dev,
1315*4882a593Smuzhiyun 				     struct netdev_queue *dev_queue)
1316*4882a593Smuzhiyun {
1317*4882a593Smuzhiyun 	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
1318*4882a593Smuzhiyun 	const struct Qdisc_ops *ops = qdisc->ops;
1319*4882a593Smuzhiyun 
1320*4882a593Smuzhiyun 	if (ops->change_tx_queue_len)
1321*4882a593Smuzhiyun 		return ops->change_tx_queue_len(qdisc, dev->tx_queue_len);
1322*4882a593Smuzhiyun 	return 0;
1323*4882a593Smuzhiyun }
1324*4882a593Smuzhiyun 
dev_qdisc_change_tx_queue_len(struct net_device * dev)1325*4882a593Smuzhiyun int dev_qdisc_change_tx_queue_len(struct net_device *dev)
1326*4882a593Smuzhiyun {
1327*4882a593Smuzhiyun 	bool up = dev->flags & IFF_UP;
1328*4882a593Smuzhiyun 	unsigned int i;
1329*4882a593Smuzhiyun 	int ret = 0;
1330*4882a593Smuzhiyun 
1331*4882a593Smuzhiyun 	if (up)
1332*4882a593Smuzhiyun 		dev_deactivate(dev);
1333*4882a593Smuzhiyun 
1334*4882a593Smuzhiyun 	for (i = 0; i < dev->num_tx_queues; i++) {
1335*4882a593Smuzhiyun 		ret = qdisc_change_tx_queue_len(dev, &dev->_tx[i]);
1336*4882a593Smuzhiyun 
1337*4882a593Smuzhiyun 		/* TODO: revert changes on a partial failure */
1338*4882a593Smuzhiyun 		if (ret)
1339*4882a593Smuzhiyun 			break;
1340*4882a593Smuzhiyun 	}
1341*4882a593Smuzhiyun 
1342*4882a593Smuzhiyun 	if (up)
1343*4882a593Smuzhiyun 		dev_activate(dev);
1344*4882a593Smuzhiyun 	return ret;
1345*4882a593Smuzhiyun }
1346*4882a593Smuzhiyun 
dev_init_scheduler_queue(struct net_device * dev,struct netdev_queue * dev_queue,void * _qdisc)1347*4882a593Smuzhiyun static void dev_init_scheduler_queue(struct net_device *dev,
1348*4882a593Smuzhiyun 				     struct netdev_queue *dev_queue,
1349*4882a593Smuzhiyun 				     void *_qdisc)
1350*4882a593Smuzhiyun {
1351*4882a593Smuzhiyun 	struct Qdisc *qdisc = _qdisc;
1352*4882a593Smuzhiyun 
1353*4882a593Smuzhiyun 	rcu_assign_pointer(dev_queue->qdisc, qdisc);
1354*4882a593Smuzhiyun 	dev_queue->qdisc_sleeping = qdisc;
1355*4882a593Smuzhiyun }
1356*4882a593Smuzhiyun 
dev_init_scheduler(struct net_device * dev)1357*4882a593Smuzhiyun void dev_init_scheduler(struct net_device *dev)
1358*4882a593Smuzhiyun {
1359*4882a593Smuzhiyun 	rcu_assign_pointer(dev->qdisc, &noop_qdisc);
1360*4882a593Smuzhiyun 	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
1361*4882a593Smuzhiyun 	if (dev_ingress_queue(dev))
1362*4882a593Smuzhiyun 		dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
1363*4882a593Smuzhiyun 
1364*4882a593Smuzhiyun 	timer_setup(&dev->watchdog_timer, dev_watchdog, 0);
1365*4882a593Smuzhiyun }
1366*4882a593Smuzhiyun 
dev_shutdown(struct net_device * dev)1367*4882a593Smuzhiyun void dev_shutdown(struct net_device *dev)
1368*4882a593Smuzhiyun {
1369*4882a593Smuzhiyun 	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
1370*4882a593Smuzhiyun 	if (dev_ingress_queue(dev))
1371*4882a593Smuzhiyun 		shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
1372*4882a593Smuzhiyun 	qdisc_put(rtnl_dereference(dev->qdisc));
1373*4882a593Smuzhiyun 	rcu_assign_pointer(dev->qdisc, &noop_qdisc);
1374*4882a593Smuzhiyun 
1375*4882a593Smuzhiyun 	WARN_ON(timer_pending(&dev->watchdog_timer));
1376*4882a593Smuzhiyun }
1377*4882a593Smuzhiyun 
psched_ratecfg_precompute(struct psched_ratecfg * r,const struct tc_ratespec * conf,u64 rate64)1378*4882a593Smuzhiyun void psched_ratecfg_precompute(struct psched_ratecfg *r,
1379*4882a593Smuzhiyun 			       const struct tc_ratespec *conf,
1380*4882a593Smuzhiyun 			       u64 rate64)
1381*4882a593Smuzhiyun {
1382*4882a593Smuzhiyun 	memset(r, 0, sizeof(*r));
1383*4882a593Smuzhiyun 	r->overhead = conf->overhead;
1384*4882a593Smuzhiyun 	r->mpu = conf->mpu;
1385*4882a593Smuzhiyun 	r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
1386*4882a593Smuzhiyun 	r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
1387*4882a593Smuzhiyun 	r->mult = 1;
1388*4882a593Smuzhiyun 	/*
1389*4882a593Smuzhiyun 	 * The deal here is to replace a divide by a reciprocal one
1390*4882a593Smuzhiyun 	 * in fast path (a reciprocal divide is a multiply and a shift)
1391*4882a593Smuzhiyun 	 *
1392*4882a593Smuzhiyun 	 * Normal formula would be :
1393*4882a593Smuzhiyun 	 *  time_in_ns = (NSEC_PER_SEC * len) / rate_bps
1394*4882a593Smuzhiyun 	 *
1395*4882a593Smuzhiyun 	 * We compute mult/shift to use instead :
1396*4882a593Smuzhiyun 	 *  time_in_ns = (len * mult) >> shift;
1397*4882a593Smuzhiyun 	 *
1398*4882a593Smuzhiyun 	 * We try to get the highest possible mult value for accuracy,
1399*4882a593Smuzhiyun 	 * but have to make sure no overflows will ever happen.
1400*4882a593Smuzhiyun 	 */
1401*4882a593Smuzhiyun 	if (r->rate_bytes_ps > 0) {
1402*4882a593Smuzhiyun 		u64 factor = NSEC_PER_SEC;
1403*4882a593Smuzhiyun 
1404*4882a593Smuzhiyun 		for (;;) {
1405*4882a593Smuzhiyun 			r->mult = div64_u64(factor, r->rate_bytes_ps);
1406*4882a593Smuzhiyun 			if (r->mult & (1U << 31) || factor & (1ULL << 63))
1407*4882a593Smuzhiyun 				break;
1408*4882a593Smuzhiyun 			factor <<= 1;
1409*4882a593Smuzhiyun 			r->shift++;
1410*4882a593Smuzhiyun 		}
1411*4882a593Smuzhiyun 	}
1412*4882a593Smuzhiyun }
1413*4882a593Smuzhiyun EXPORT_SYMBOL(psched_ratecfg_precompute);
1414*4882a593Smuzhiyun 
mini_qdisc_rcu_func(struct rcu_head * head)1415*4882a593Smuzhiyun static void mini_qdisc_rcu_func(struct rcu_head *head)
1416*4882a593Smuzhiyun {
1417*4882a593Smuzhiyun }
1418*4882a593Smuzhiyun 
mini_qdisc_pair_swap(struct mini_Qdisc_pair * miniqp,struct tcf_proto * tp_head)1419*4882a593Smuzhiyun void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
1420*4882a593Smuzhiyun 			  struct tcf_proto *tp_head)
1421*4882a593Smuzhiyun {
1422*4882a593Smuzhiyun 	/* Protected with chain0->filter_chain_lock.
1423*4882a593Smuzhiyun 	 * Can't access chain directly because tp_head can be NULL.
1424*4882a593Smuzhiyun 	 */
1425*4882a593Smuzhiyun 	struct mini_Qdisc *miniq_old =
1426*4882a593Smuzhiyun 		rcu_dereference_protected(*miniqp->p_miniq, 1);
1427*4882a593Smuzhiyun 	struct mini_Qdisc *miniq;
1428*4882a593Smuzhiyun 
1429*4882a593Smuzhiyun 	if (!tp_head) {
1430*4882a593Smuzhiyun 		RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
1431*4882a593Smuzhiyun 		/* Wait for flying RCU callback before it is freed. */
1432*4882a593Smuzhiyun 		rcu_barrier();
1433*4882a593Smuzhiyun 		return;
1434*4882a593Smuzhiyun 	}
1435*4882a593Smuzhiyun 
1436*4882a593Smuzhiyun 	miniq = !miniq_old || miniq_old == &miniqp->miniq2 ?
1437*4882a593Smuzhiyun 		&miniqp->miniq1 : &miniqp->miniq2;
1438*4882a593Smuzhiyun 
1439*4882a593Smuzhiyun 	/* We need to make sure that readers won't see the miniq
1440*4882a593Smuzhiyun 	 * we are about to modify. So wait until previous call_rcu callback
1441*4882a593Smuzhiyun 	 * is done.
1442*4882a593Smuzhiyun 	 */
1443*4882a593Smuzhiyun 	rcu_barrier();
1444*4882a593Smuzhiyun 	miniq->filter_list = tp_head;
1445*4882a593Smuzhiyun 	rcu_assign_pointer(*miniqp->p_miniq, miniq);
1446*4882a593Smuzhiyun 
1447*4882a593Smuzhiyun 	if (miniq_old)
1448*4882a593Smuzhiyun 		/* This is counterpart of the rcu barriers above. We need to
1449*4882a593Smuzhiyun 		 * block potential new user of miniq_old until all readers
1450*4882a593Smuzhiyun 		 * are not seeing it.
1451*4882a593Smuzhiyun 		 */
1452*4882a593Smuzhiyun 		call_rcu(&miniq_old->rcu, mini_qdisc_rcu_func);
1453*4882a593Smuzhiyun }
1454*4882a593Smuzhiyun EXPORT_SYMBOL(mini_qdisc_pair_swap);
1455*4882a593Smuzhiyun 
mini_qdisc_pair_block_init(struct mini_Qdisc_pair * miniqp,struct tcf_block * block)1456*4882a593Smuzhiyun void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp,
1457*4882a593Smuzhiyun 				struct tcf_block *block)
1458*4882a593Smuzhiyun {
1459*4882a593Smuzhiyun 	miniqp->miniq1.block = block;
1460*4882a593Smuzhiyun 	miniqp->miniq2.block = block;
1461*4882a593Smuzhiyun }
1462*4882a593Smuzhiyun EXPORT_SYMBOL(mini_qdisc_pair_block_init);
1463*4882a593Smuzhiyun 
mini_qdisc_pair_init(struct mini_Qdisc_pair * miniqp,struct Qdisc * qdisc,struct mini_Qdisc __rcu ** p_miniq)1464*4882a593Smuzhiyun void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
1465*4882a593Smuzhiyun 			  struct mini_Qdisc __rcu **p_miniq)
1466*4882a593Smuzhiyun {
1467*4882a593Smuzhiyun 	miniqp->miniq1.cpu_bstats = qdisc->cpu_bstats;
1468*4882a593Smuzhiyun 	miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats;
1469*4882a593Smuzhiyun 	miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats;
1470*4882a593Smuzhiyun 	miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats;
1471*4882a593Smuzhiyun 	miniqp->p_miniq = p_miniq;
1472*4882a593Smuzhiyun }
1473*4882a593Smuzhiyun EXPORT_SYMBOL(mini_qdisc_pair_init);
1474