1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * net/sched/sch_sfq.c Stochastic Fairness Queueing discipline.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include <linux/module.h>
9*4882a593Smuzhiyun #include <linux/types.h>
10*4882a593Smuzhiyun #include <linux/kernel.h>
11*4882a593Smuzhiyun #include <linux/jiffies.h>
12*4882a593Smuzhiyun #include <linux/string.h>
13*4882a593Smuzhiyun #include <linux/in.h>
14*4882a593Smuzhiyun #include <linux/errno.h>
15*4882a593Smuzhiyun #include <linux/init.h>
16*4882a593Smuzhiyun #include <linux/skbuff.h>
17*4882a593Smuzhiyun #include <linux/siphash.h>
18*4882a593Smuzhiyun #include <linux/slab.h>
19*4882a593Smuzhiyun #include <linux/vmalloc.h>
20*4882a593Smuzhiyun #include <net/netlink.h>
21*4882a593Smuzhiyun #include <net/pkt_sched.h>
22*4882a593Smuzhiyun #include <net/pkt_cls.h>
23*4882a593Smuzhiyun #include <net/red.h>
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun /* Stochastic Fairness Queuing algorithm.
27*4882a593Smuzhiyun =======================================
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun Source:
30*4882a593Smuzhiyun Paul E. McKenney "Stochastic Fairness Queuing",
31*4882a593Smuzhiyun IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun Paul E. McKenney "Stochastic Fairness Queuing",
34*4882a593Smuzhiyun "Interworking: Research and Experience", v.2, 1991, p.113-131.
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun See also:
38*4882a593Smuzhiyun M. Shreedhar and George Varghese "Efficient Fair
39*4882a593Smuzhiyun Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun This is not the thing that is usually called (W)FQ nowadays.
43*4882a593Smuzhiyun It does not use any timestamp mechanism, but instead
44*4882a593Smuzhiyun processes queues in round-robin order.
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun ADVANTAGE:
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun - It is very cheap. Both CPU and memory requirements are minimal.
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun DRAWBACKS:
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun - "Stochastic" -> It is not 100% fair.
53*4882a593Smuzhiyun When hash collisions occur, several flows are considered as one.
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun - "Round-robin" -> It introduces larger delays than virtual clock
56*4882a593Smuzhiyun based schemes, and should not be used for isolating interactive
57*4882a593Smuzhiyun traffic from non-interactive. It means, that this scheduler
58*4882a593Smuzhiyun should be used as leaf of CBQ or P3, which put interactive traffic
59*4882a593Smuzhiyun to higher priority band.
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun We still need true WFQ for top level CSZ, but using WFQ
62*4882a593Smuzhiyun for the best effort traffic is absolutely pointless:
63*4882a593Smuzhiyun SFQ is superior for this purpose.
64*4882a593Smuzhiyun
65*4882a593Smuzhiyun IMPLEMENTATION:
66*4882a593Smuzhiyun This implementation limits :
67*4882a593Smuzhiyun - maximal queue length per flow to 127 packets.
68*4882a593Smuzhiyun - max mtu to 2^18-1;
69*4882a593Smuzhiyun - max 65408 flows,
70*4882a593Smuzhiyun - number of hash buckets to 65536.
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun It is easy to increase these values, but not in flight. */
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun #define SFQ_MAX_DEPTH 127 /* max number of packets per flow */
75*4882a593Smuzhiyun #define SFQ_DEFAULT_FLOWS 128
76*4882a593Smuzhiyun #define SFQ_MAX_FLOWS (0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */
77*4882a593Smuzhiyun #define SFQ_EMPTY_SLOT 0xffff
78*4882a593Smuzhiyun #define SFQ_DEFAULT_HASH_DIVISOR 1024
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun /* We use 16 bits to store allot, and want to handle packets up to 64K
81*4882a593Smuzhiyun * Scale allot by 8 (1<<3) so that no overflow occurs.
82*4882a593Smuzhiyun */
83*4882a593Smuzhiyun #define SFQ_ALLOT_SHIFT 3
84*4882a593Smuzhiyun #define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun /* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
87*4882a593Smuzhiyun typedef u16 sfq_index;
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun /*
90*4882a593Smuzhiyun * We dont use pointers to save space.
91*4882a593Smuzhiyun * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array
92*4882a593Smuzhiyun * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH]
93*4882a593Smuzhiyun * are 'pointers' to dep[] array
94*4882a593Smuzhiyun */
95*4882a593Smuzhiyun struct sfq_head {
96*4882a593Smuzhiyun sfq_index next;
97*4882a593Smuzhiyun sfq_index prev;
98*4882a593Smuzhiyun };
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun struct sfq_slot {
101*4882a593Smuzhiyun struct sk_buff *skblist_next;
102*4882a593Smuzhiyun struct sk_buff *skblist_prev;
103*4882a593Smuzhiyun sfq_index qlen; /* number of skbs in skblist */
104*4882a593Smuzhiyun sfq_index next; /* next slot in sfq RR chain */
105*4882a593Smuzhiyun struct sfq_head dep; /* anchor in dep[] chains */
106*4882a593Smuzhiyun unsigned short hash; /* hash value (index in ht[]) */
107*4882a593Smuzhiyun short allot; /* credit for this slot */
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun unsigned int backlog;
110*4882a593Smuzhiyun struct red_vars vars;
111*4882a593Smuzhiyun };
112*4882a593Smuzhiyun
113*4882a593Smuzhiyun struct sfq_sched_data {
114*4882a593Smuzhiyun /* frequently used fields */
115*4882a593Smuzhiyun int limit; /* limit of total number of packets in this qdisc */
116*4882a593Smuzhiyun unsigned int divisor; /* number of slots in hash table */
117*4882a593Smuzhiyun u8 headdrop;
118*4882a593Smuzhiyun u8 maxdepth; /* limit of packets per flow */
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun siphash_key_t perturbation;
121*4882a593Smuzhiyun u8 cur_depth; /* depth of longest slot */
122*4882a593Smuzhiyun u8 flags;
123*4882a593Smuzhiyun unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
124*4882a593Smuzhiyun struct tcf_proto __rcu *filter_list;
125*4882a593Smuzhiyun struct tcf_block *block;
126*4882a593Smuzhiyun sfq_index *ht; /* Hash table ('divisor' slots) */
127*4882a593Smuzhiyun struct sfq_slot *slots; /* Flows table ('maxflows' entries) */
128*4882a593Smuzhiyun
129*4882a593Smuzhiyun struct red_parms *red_parms;
130*4882a593Smuzhiyun struct tc_sfqred_stats stats;
131*4882a593Smuzhiyun struct sfq_slot *tail; /* current slot in round */
132*4882a593Smuzhiyun
133*4882a593Smuzhiyun struct sfq_head dep[SFQ_MAX_DEPTH + 1];
134*4882a593Smuzhiyun /* Linked lists of slots, indexed by depth
135*4882a593Smuzhiyun * dep[0] : list of unused flows
136*4882a593Smuzhiyun * dep[1] : list of flows with 1 packet
137*4882a593Smuzhiyun * dep[X] : list of flows with X packets
138*4882a593Smuzhiyun */
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun unsigned int maxflows; /* number of flows in flows array */
141*4882a593Smuzhiyun int perturb_period;
142*4882a593Smuzhiyun unsigned int quantum; /* Allotment per round: MUST BE >= MTU */
143*4882a593Smuzhiyun struct timer_list perturb_timer;
144*4882a593Smuzhiyun struct Qdisc *sch;
145*4882a593Smuzhiyun };
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun /*
148*4882a593Smuzhiyun * sfq_head are either in a sfq_slot or in dep[] array
149*4882a593Smuzhiyun */
sfq_dep_head(struct sfq_sched_data * q,sfq_index val)150*4882a593Smuzhiyun static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
151*4882a593Smuzhiyun {
152*4882a593Smuzhiyun if (val < SFQ_MAX_FLOWS)
153*4882a593Smuzhiyun return &q->slots[val].dep;
154*4882a593Smuzhiyun return &q->dep[val - SFQ_MAX_FLOWS];
155*4882a593Smuzhiyun }
156*4882a593Smuzhiyun
sfq_hash(const struct sfq_sched_data * q,const struct sk_buff * skb)157*4882a593Smuzhiyun static unsigned int sfq_hash(const struct sfq_sched_data *q,
158*4882a593Smuzhiyun const struct sk_buff *skb)
159*4882a593Smuzhiyun {
160*4882a593Smuzhiyun return skb_get_hash_perturb(skb, &q->perturbation) & (q->divisor - 1);
161*4882a593Smuzhiyun }
162*4882a593Smuzhiyun
sfq_classify(struct sk_buff * skb,struct Qdisc * sch,int * qerr)163*4882a593Smuzhiyun static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
164*4882a593Smuzhiyun int *qerr)
165*4882a593Smuzhiyun {
166*4882a593Smuzhiyun struct sfq_sched_data *q = qdisc_priv(sch);
167*4882a593Smuzhiyun struct tcf_result res;
168*4882a593Smuzhiyun struct tcf_proto *fl;
169*4882a593Smuzhiyun int result;
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun if (TC_H_MAJ(skb->priority) == sch->handle &&
172*4882a593Smuzhiyun TC_H_MIN(skb->priority) > 0 &&
173*4882a593Smuzhiyun TC_H_MIN(skb->priority) <= q->divisor)
174*4882a593Smuzhiyun return TC_H_MIN(skb->priority);
175*4882a593Smuzhiyun
176*4882a593Smuzhiyun fl = rcu_dereference_bh(q->filter_list);
177*4882a593Smuzhiyun if (!fl)
178*4882a593Smuzhiyun return sfq_hash(q, skb) + 1;
179*4882a593Smuzhiyun
180*4882a593Smuzhiyun *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
181*4882a593Smuzhiyun result = tcf_classify(skb, fl, &res, false);
182*4882a593Smuzhiyun if (result >= 0) {
183*4882a593Smuzhiyun #ifdef CONFIG_NET_CLS_ACT
184*4882a593Smuzhiyun switch (result) {
185*4882a593Smuzhiyun case TC_ACT_STOLEN:
186*4882a593Smuzhiyun case TC_ACT_QUEUED:
187*4882a593Smuzhiyun case TC_ACT_TRAP:
188*4882a593Smuzhiyun *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
189*4882a593Smuzhiyun fallthrough;
190*4882a593Smuzhiyun case TC_ACT_SHOT:
191*4882a593Smuzhiyun return 0;
192*4882a593Smuzhiyun }
193*4882a593Smuzhiyun #endif
194*4882a593Smuzhiyun if (TC_H_MIN(res.classid) <= q->divisor)
195*4882a593Smuzhiyun return TC_H_MIN(res.classid);
196*4882a593Smuzhiyun }
197*4882a593Smuzhiyun return 0;
198*4882a593Smuzhiyun }
199*4882a593Smuzhiyun
200*4882a593Smuzhiyun /*
201*4882a593Smuzhiyun * x : slot number [0 .. SFQ_MAX_FLOWS - 1]
202*4882a593Smuzhiyun */
sfq_link(struct sfq_sched_data * q,sfq_index x)203*4882a593Smuzhiyun static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
204*4882a593Smuzhiyun {
205*4882a593Smuzhiyun sfq_index p, n;
206*4882a593Smuzhiyun struct sfq_slot *slot = &q->slots[x];
207*4882a593Smuzhiyun int qlen = slot->qlen;
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun p = qlen + SFQ_MAX_FLOWS;
210*4882a593Smuzhiyun n = q->dep[qlen].next;
211*4882a593Smuzhiyun
212*4882a593Smuzhiyun slot->dep.next = n;
213*4882a593Smuzhiyun slot->dep.prev = p;
214*4882a593Smuzhiyun
215*4882a593Smuzhiyun q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */
216*4882a593Smuzhiyun sfq_dep_head(q, n)->prev = x;
217*4882a593Smuzhiyun }
218*4882a593Smuzhiyun
219*4882a593Smuzhiyun #define sfq_unlink(q, x, n, p) \
220*4882a593Smuzhiyun do { \
221*4882a593Smuzhiyun n = q->slots[x].dep.next; \
222*4882a593Smuzhiyun p = q->slots[x].dep.prev; \
223*4882a593Smuzhiyun sfq_dep_head(q, p)->next = n; \
224*4882a593Smuzhiyun sfq_dep_head(q, n)->prev = p; \
225*4882a593Smuzhiyun } while (0)
226*4882a593Smuzhiyun
227*4882a593Smuzhiyun
sfq_dec(struct sfq_sched_data * q,sfq_index x)228*4882a593Smuzhiyun static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
229*4882a593Smuzhiyun {
230*4882a593Smuzhiyun sfq_index p, n;
231*4882a593Smuzhiyun int d;
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun sfq_unlink(q, x, n, p);
234*4882a593Smuzhiyun
235*4882a593Smuzhiyun d = q->slots[x].qlen--;
236*4882a593Smuzhiyun if (n == p && q->cur_depth == d)
237*4882a593Smuzhiyun q->cur_depth--;
238*4882a593Smuzhiyun sfq_link(q, x);
239*4882a593Smuzhiyun }
240*4882a593Smuzhiyun
sfq_inc(struct sfq_sched_data * q,sfq_index x)241*4882a593Smuzhiyun static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
242*4882a593Smuzhiyun {
243*4882a593Smuzhiyun sfq_index p, n;
244*4882a593Smuzhiyun int d;
245*4882a593Smuzhiyun
246*4882a593Smuzhiyun sfq_unlink(q, x, n, p);
247*4882a593Smuzhiyun
248*4882a593Smuzhiyun d = ++q->slots[x].qlen;
249*4882a593Smuzhiyun if (q->cur_depth < d)
250*4882a593Smuzhiyun q->cur_depth = d;
251*4882a593Smuzhiyun sfq_link(q, x);
252*4882a593Smuzhiyun }
253*4882a593Smuzhiyun
254*4882a593Smuzhiyun /* helper functions : might be changed when/if skb use a standard list_head */
255*4882a593Smuzhiyun
256*4882a593Smuzhiyun /* remove one skb from tail of slot queue */
slot_dequeue_tail(struct sfq_slot * slot)257*4882a593Smuzhiyun static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
258*4882a593Smuzhiyun {
259*4882a593Smuzhiyun struct sk_buff *skb = slot->skblist_prev;
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun slot->skblist_prev = skb->prev;
262*4882a593Smuzhiyun skb->prev->next = (struct sk_buff *)slot;
263*4882a593Smuzhiyun skb->next = skb->prev = NULL;
264*4882a593Smuzhiyun return skb;
265*4882a593Smuzhiyun }
266*4882a593Smuzhiyun
267*4882a593Smuzhiyun /* remove one skb from head of slot queue */
slot_dequeue_head(struct sfq_slot * slot)268*4882a593Smuzhiyun static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
269*4882a593Smuzhiyun {
270*4882a593Smuzhiyun struct sk_buff *skb = slot->skblist_next;
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun slot->skblist_next = skb->next;
273*4882a593Smuzhiyun skb->next->prev = (struct sk_buff *)slot;
274*4882a593Smuzhiyun skb->next = skb->prev = NULL;
275*4882a593Smuzhiyun return skb;
276*4882a593Smuzhiyun }
277*4882a593Smuzhiyun
slot_queue_init(struct sfq_slot * slot)278*4882a593Smuzhiyun static inline void slot_queue_init(struct sfq_slot *slot)
279*4882a593Smuzhiyun {
280*4882a593Smuzhiyun memset(slot, 0, sizeof(*slot));
281*4882a593Smuzhiyun slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
282*4882a593Smuzhiyun }
283*4882a593Smuzhiyun
284*4882a593Smuzhiyun /* add skb to slot queue (tail add) */
slot_queue_add(struct sfq_slot * slot,struct sk_buff * skb)285*4882a593Smuzhiyun static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
286*4882a593Smuzhiyun {
287*4882a593Smuzhiyun skb->prev = slot->skblist_prev;
288*4882a593Smuzhiyun skb->next = (struct sk_buff *)slot;
289*4882a593Smuzhiyun slot->skblist_prev->next = skb;
290*4882a593Smuzhiyun slot->skblist_prev = skb;
291*4882a593Smuzhiyun }
292*4882a593Smuzhiyun
sfq_drop(struct Qdisc * sch,struct sk_buff ** to_free)293*4882a593Smuzhiyun static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free)
294*4882a593Smuzhiyun {
295*4882a593Smuzhiyun struct sfq_sched_data *q = qdisc_priv(sch);
296*4882a593Smuzhiyun sfq_index x, d = q->cur_depth;
297*4882a593Smuzhiyun struct sk_buff *skb;
298*4882a593Smuzhiyun unsigned int len;
299*4882a593Smuzhiyun struct sfq_slot *slot;
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun /* Queue is full! Find the longest slot and drop tail packet from it */
302*4882a593Smuzhiyun if (d > 1) {
303*4882a593Smuzhiyun x = q->dep[d].next;
304*4882a593Smuzhiyun slot = &q->slots[x];
305*4882a593Smuzhiyun drop:
306*4882a593Smuzhiyun skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
307*4882a593Smuzhiyun len = qdisc_pkt_len(skb);
308*4882a593Smuzhiyun slot->backlog -= len;
309*4882a593Smuzhiyun sfq_dec(q, x);
310*4882a593Smuzhiyun sch->q.qlen--;
311*4882a593Smuzhiyun qdisc_qstats_backlog_dec(sch, skb);
312*4882a593Smuzhiyun qdisc_drop(skb, sch, to_free);
313*4882a593Smuzhiyun return len;
314*4882a593Smuzhiyun }
315*4882a593Smuzhiyun
316*4882a593Smuzhiyun if (d == 1) {
317*4882a593Smuzhiyun /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
318*4882a593Smuzhiyun x = q->tail->next;
319*4882a593Smuzhiyun slot = &q->slots[x];
320*4882a593Smuzhiyun q->tail->next = slot->next;
321*4882a593Smuzhiyun q->ht[slot->hash] = SFQ_EMPTY_SLOT;
322*4882a593Smuzhiyun goto drop;
323*4882a593Smuzhiyun }
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun return 0;
326*4882a593Smuzhiyun }
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun /* Is ECN parameter configured */
sfq_prob_mark(const struct sfq_sched_data * q)329*4882a593Smuzhiyun static int sfq_prob_mark(const struct sfq_sched_data *q)
330*4882a593Smuzhiyun {
331*4882a593Smuzhiyun return q->flags & TC_RED_ECN;
332*4882a593Smuzhiyun }
333*4882a593Smuzhiyun
334*4882a593Smuzhiyun /* Should packets over max threshold just be marked */
sfq_hard_mark(const struct sfq_sched_data * q)335*4882a593Smuzhiyun static int sfq_hard_mark(const struct sfq_sched_data *q)
336*4882a593Smuzhiyun {
337*4882a593Smuzhiyun return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN;
338*4882a593Smuzhiyun }
339*4882a593Smuzhiyun
sfq_headdrop(const struct sfq_sched_data * q)340*4882a593Smuzhiyun static int sfq_headdrop(const struct sfq_sched_data *q)
341*4882a593Smuzhiyun {
342*4882a593Smuzhiyun return q->headdrop;
343*4882a593Smuzhiyun }
344*4882a593Smuzhiyun
345*4882a593Smuzhiyun static int
sfq_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)346*4882a593Smuzhiyun sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
347*4882a593Smuzhiyun {
348*4882a593Smuzhiyun struct sfq_sched_data *q = qdisc_priv(sch);
349*4882a593Smuzhiyun unsigned int hash, dropped;
350*4882a593Smuzhiyun sfq_index x, qlen;
351*4882a593Smuzhiyun struct sfq_slot *slot;
352*4882a593Smuzhiyun int ret;
353*4882a593Smuzhiyun struct sk_buff *head;
354*4882a593Smuzhiyun int delta;
355*4882a593Smuzhiyun
356*4882a593Smuzhiyun hash = sfq_classify(skb, sch, &ret);
357*4882a593Smuzhiyun if (hash == 0) {
358*4882a593Smuzhiyun if (ret & __NET_XMIT_BYPASS)
359*4882a593Smuzhiyun qdisc_qstats_drop(sch);
360*4882a593Smuzhiyun __qdisc_drop(skb, to_free);
361*4882a593Smuzhiyun return ret;
362*4882a593Smuzhiyun }
363*4882a593Smuzhiyun hash--;
364*4882a593Smuzhiyun
365*4882a593Smuzhiyun x = q->ht[hash];
366*4882a593Smuzhiyun slot = &q->slots[x];
367*4882a593Smuzhiyun if (x == SFQ_EMPTY_SLOT) {
368*4882a593Smuzhiyun x = q->dep[0].next; /* get a free slot */
369*4882a593Smuzhiyun if (x >= SFQ_MAX_FLOWS)
370*4882a593Smuzhiyun return qdisc_drop(skb, sch, to_free);
371*4882a593Smuzhiyun q->ht[hash] = x;
372*4882a593Smuzhiyun slot = &q->slots[x];
373*4882a593Smuzhiyun slot->hash = hash;
374*4882a593Smuzhiyun slot->backlog = 0; /* should already be 0 anyway... */
375*4882a593Smuzhiyun red_set_vars(&slot->vars);
376*4882a593Smuzhiyun goto enqueue;
377*4882a593Smuzhiyun }
378*4882a593Smuzhiyun if (q->red_parms) {
379*4882a593Smuzhiyun slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
380*4882a593Smuzhiyun &slot->vars,
381*4882a593Smuzhiyun slot->backlog);
382*4882a593Smuzhiyun switch (red_action(q->red_parms,
383*4882a593Smuzhiyun &slot->vars,
384*4882a593Smuzhiyun slot->vars.qavg)) {
385*4882a593Smuzhiyun case RED_DONT_MARK:
386*4882a593Smuzhiyun break;
387*4882a593Smuzhiyun
388*4882a593Smuzhiyun case RED_PROB_MARK:
389*4882a593Smuzhiyun qdisc_qstats_overlimit(sch);
390*4882a593Smuzhiyun if (sfq_prob_mark(q)) {
391*4882a593Smuzhiyun /* We know we have at least one packet in queue */
392*4882a593Smuzhiyun if (sfq_headdrop(q) &&
393*4882a593Smuzhiyun INET_ECN_set_ce(slot->skblist_next)) {
394*4882a593Smuzhiyun q->stats.prob_mark_head++;
395*4882a593Smuzhiyun break;
396*4882a593Smuzhiyun }
397*4882a593Smuzhiyun if (INET_ECN_set_ce(skb)) {
398*4882a593Smuzhiyun q->stats.prob_mark++;
399*4882a593Smuzhiyun break;
400*4882a593Smuzhiyun }
401*4882a593Smuzhiyun }
402*4882a593Smuzhiyun q->stats.prob_drop++;
403*4882a593Smuzhiyun goto congestion_drop;
404*4882a593Smuzhiyun
405*4882a593Smuzhiyun case RED_HARD_MARK:
406*4882a593Smuzhiyun qdisc_qstats_overlimit(sch);
407*4882a593Smuzhiyun if (sfq_hard_mark(q)) {
408*4882a593Smuzhiyun /* We know we have at least one packet in queue */
409*4882a593Smuzhiyun if (sfq_headdrop(q) &&
410*4882a593Smuzhiyun INET_ECN_set_ce(slot->skblist_next)) {
411*4882a593Smuzhiyun q->stats.forced_mark_head++;
412*4882a593Smuzhiyun break;
413*4882a593Smuzhiyun }
414*4882a593Smuzhiyun if (INET_ECN_set_ce(skb)) {
415*4882a593Smuzhiyun q->stats.forced_mark++;
416*4882a593Smuzhiyun break;
417*4882a593Smuzhiyun }
418*4882a593Smuzhiyun }
419*4882a593Smuzhiyun q->stats.forced_drop++;
420*4882a593Smuzhiyun goto congestion_drop;
421*4882a593Smuzhiyun }
422*4882a593Smuzhiyun }
423*4882a593Smuzhiyun
424*4882a593Smuzhiyun if (slot->qlen >= q->maxdepth) {
425*4882a593Smuzhiyun congestion_drop:
426*4882a593Smuzhiyun if (!sfq_headdrop(q))
427*4882a593Smuzhiyun return qdisc_drop(skb, sch, to_free);
428*4882a593Smuzhiyun
429*4882a593Smuzhiyun /* We know we have at least one packet in queue */
430*4882a593Smuzhiyun head = slot_dequeue_head(slot);
431*4882a593Smuzhiyun delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb);
432*4882a593Smuzhiyun sch->qstats.backlog -= delta;
433*4882a593Smuzhiyun slot->backlog -= delta;
434*4882a593Smuzhiyun qdisc_drop(head, sch, to_free);
435*4882a593Smuzhiyun
436*4882a593Smuzhiyun slot_queue_add(slot, skb);
437*4882a593Smuzhiyun qdisc_tree_reduce_backlog(sch, 0, delta);
438*4882a593Smuzhiyun return NET_XMIT_CN;
439*4882a593Smuzhiyun }
440*4882a593Smuzhiyun
441*4882a593Smuzhiyun enqueue:
442*4882a593Smuzhiyun qdisc_qstats_backlog_inc(sch, skb);
443*4882a593Smuzhiyun slot->backlog += qdisc_pkt_len(skb);
444*4882a593Smuzhiyun slot_queue_add(slot, skb);
445*4882a593Smuzhiyun sfq_inc(q, x);
446*4882a593Smuzhiyun if (slot->qlen == 1) { /* The flow is new */
447*4882a593Smuzhiyun if (q->tail == NULL) { /* It is the first flow */
448*4882a593Smuzhiyun slot->next = x;
449*4882a593Smuzhiyun } else {
450*4882a593Smuzhiyun slot->next = q->tail->next;
451*4882a593Smuzhiyun q->tail->next = x;
452*4882a593Smuzhiyun }
453*4882a593Smuzhiyun /* We put this flow at the end of our flow list.
454*4882a593Smuzhiyun * This might sound unfair for a new flow to wait after old ones,
455*4882a593Smuzhiyun * but we could endup servicing new flows only, and freeze old ones.
456*4882a593Smuzhiyun */
457*4882a593Smuzhiyun q->tail = slot;
458*4882a593Smuzhiyun /* We could use a bigger initial quantum for new flows */
459*4882a593Smuzhiyun slot->allot = q->scaled_quantum;
460*4882a593Smuzhiyun }
461*4882a593Smuzhiyun if (++sch->q.qlen <= q->limit)
462*4882a593Smuzhiyun return NET_XMIT_SUCCESS;
463*4882a593Smuzhiyun
464*4882a593Smuzhiyun qlen = slot->qlen;
465*4882a593Smuzhiyun dropped = sfq_drop(sch, to_free);
466*4882a593Smuzhiyun /* Return Congestion Notification only if we dropped a packet
467*4882a593Smuzhiyun * from this flow.
468*4882a593Smuzhiyun */
469*4882a593Smuzhiyun if (qlen != slot->qlen) {
470*4882a593Smuzhiyun qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb));
471*4882a593Smuzhiyun return NET_XMIT_CN;
472*4882a593Smuzhiyun }
473*4882a593Smuzhiyun
474*4882a593Smuzhiyun /* As we dropped a packet, better let upper stack know this */
475*4882a593Smuzhiyun qdisc_tree_reduce_backlog(sch, 1, dropped);
476*4882a593Smuzhiyun return NET_XMIT_SUCCESS;
477*4882a593Smuzhiyun }
478*4882a593Smuzhiyun
479*4882a593Smuzhiyun static struct sk_buff *
sfq_dequeue(struct Qdisc * sch)480*4882a593Smuzhiyun sfq_dequeue(struct Qdisc *sch)
481*4882a593Smuzhiyun {
482*4882a593Smuzhiyun struct sfq_sched_data *q = qdisc_priv(sch);
483*4882a593Smuzhiyun struct sk_buff *skb;
484*4882a593Smuzhiyun sfq_index a, next_a;
485*4882a593Smuzhiyun struct sfq_slot *slot;
486*4882a593Smuzhiyun
487*4882a593Smuzhiyun /* No active slots */
488*4882a593Smuzhiyun if (q->tail == NULL)
489*4882a593Smuzhiyun return NULL;
490*4882a593Smuzhiyun
491*4882a593Smuzhiyun next_slot:
492*4882a593Smuzhiyun a = q->tail->next;
493*4882a593Smuzhiyun slot = &q->slots[a];
494*4882a593Smuzhiyun if (slot->allot <= 0) {
495*4882a593Smuzhiyun q->tail = slot;
496*4882a593Smuzhiyun slot->allot += q->scaled_quantum;
497*4882a593Smuzhiyun goto next_slot;
498*4882a593Smuzhiyun }
499*4882a593Smuzhiyun skb = slot_dequeue_head(slot);
500*4882a593Smuzhiyun sfq_dec(q, a);
501*4882a593Smuzhiyun qdisc_bstats_update(sch, skb);
502*4882a593Smuzhiyun sch->q.qlen--;
503*4882a593Smuzhiyun qdisc_qstats_backlog_dec(sch, skb);
504*4882a593Smuzhiyun slot->backlog -= qdisc_pkt_len(skb);
505*4882a593Smuzhiyun /* Is the slot empty? */
506*4882a593Smuzhiyun if (slot->qlen == 0) {
507*4882a593Smuzhiyun q->ht[slot->hash] = SFQ_EMPTY_SLOT;
508*4882a593Smuzhiyun next_a = slot->next;
509*4882a593Smuzhiyun if (a == next_a) {
510*4882a593Smuzhiyun q->tail = NULL; /* no more active slots */
511*4882a593Smuzhiyun return skb;
512*4882a593Smuzhiyun }
513*4882a593Smuzhiyun q->tail->next = next_a;
514*4882a593Smuzhiyun } else {
515*4882a593Smuzhiyun slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
516*4882a593Smuzhiyun }
517*4882a593Smuzhiyun return skb;
518*4882a593Smuzhiyun }
519*4882a593Smuzhiyun
520*4882a593Smuzhiyun static void
sfq_reset(struct Qdisc * sch)521*4882a593Smuzhiyun sfq_reset(struct Qdisc *sch)
522*4882a593Smuzhiyun {
523*4882a593Smuzhiyun struct sk_buff *skb;
524*4882a593Smuzhiyun
525*4882a593Smuzhiyun while ((skb = sfq_dequeue(sch)) != NULL)
526*4882a593Smuzhiyun rtnl_kfree_skbs(skb, skb);
527*4882a593Smuzhiyun }
528*4882a593Smuzhiyun
529*4882a593Smuzhiyun /*
530*4882a593Smuzhiyun * When q->perturbation is changed, we rehash all queued skbs
531*4882a593Smuzhiyun * to avoid OOO (Out Of Order) effects.
532*4882a593Smuzhiyun * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
533*4882a593Smuzhiyun * counters.
534*4882a593Smuzhiyun */
sfq_rehash(struct Qdisc * sch)535*4882a593Smuzhiyun static void sfq_rehash(struct Qdisc *sch)
536*4882a593Smuzhiyun {
537*4882a593Smuzhiyun struct sfq_sched_data *q = qdisc_priv(sch);
538*4882a593Smuzhiyun struct sk_buff *skb;
539*4882a593Smuzhiyun int i;
540*4882a593Smuzhiyun struct sfq_slot *slot;
541*4882a593Smuzhiyun struct sk_buff_head list;
542*4882a593Smuzhiyun int dropped = 0;
543*4882a593Smuzhiyun unsigned int drop_len = 0;
544*4882a593Smuzhiyun
545*4882a593Smuzhiyun __skb_queue_head_init(&list);
546*4882a593Smuzhiyun
547*4882a593Smuzhiyun for (i = 0; i < q->maxflows; i++) {
548*4882a593Smuzhiyun slot = &q->slots[i];
549*4882a593Smuzhiyun if (!slot->qlen)
550*4882a593Smuzhiyun continue;
551*4882a593Smuzhiyun while (slot->qlen) {
552*4882a593Smuzhiyun skb = slot_dequeue_head(slot);
553*4882a593Smuzhiyun sfq_dec(q, i);
554*4882a593Smuzhiyun __skb_queue_tail(&list, skb);
555*4882a593Smuzhiyun }
556*4882a593Smuzhiyun slot->backlog = 0;
557*4882a593Smuzhiyun red_set_vars(&slot->vars);
558*4882a593Smuzhiyun q->ht[slot->hash] = SFQ_EMPTY_SLOT;
559*4882a593Smuzhiyun }
560*4882a593Smuzhiyun q->tail = NULL;
561*4882a593Smuzhiyun
562*4882a593Smuzhiyun while ((skb = __skb_dequeue(&list)) != NULL) {
563*4882a593Smuzhiyun unsigned int hash = sfq_hash(q, skb);
564*4882a593Smuzhiyun sfq_index x = q->ht[hash];
565*4882a593Smuzhiyun
566*4882a593Smuzhiyun slot = &q->slots[x];
567*4882a593Smuzhiyun if (x == SFQ_EMPTY_SLOT) {
568*4882a593Smuzhiyun x = q->dep[0].next; /* get a free slot */
569*4882a593Smuzhiyun if (x >= SFQ_MAX_FLOWS) {
570*4882a593Smuzhiyun drop:
571*4882a593Smuzhiyun qdisc_qstats_backlog_dec(sch, skb);
572*4882a593Smuzhiyun drop_len += qdisc_pkt_len(skb);
573*4882a593Smuzhiyun kfree_skb(skb);
574*4882a593Smuzhiyun dropped++;
575*4882a593Smuzhiyun continue;
576*4882a593Smuzhiyun }
577*4882a593Smuzhiyun q->ht[hash] = x;
578*4882a593Smuzhiyun slot = &q->slots[x];
579*4882a593Smuzhiyun slot->hash = hash;
580*4882a593Smuzhiyun }
581*4882a593Smuzhiyun if (slot->qlen >= q->maxdepth)
582*4882a593Smuzhiyun goto drop;
583*4882a593Smuzhiyun slot_queue_add(slot, skb);
584*4882a593Smuzhiyun if (q->red_parms)
585*4882a593Smuzhiyun slot->vars.qavg = red_calc_qavg(q->red_parms,
586*4882a593Smuzhiyun &slot->vars,
587*4882a593Smuzhiyun slot->backlog);
588*4882a593Smuzhiyun slot->backlog += qdisc_pkt_len(skb);
589*4882a593Smuzhiyun sfq_inc(q, x);
590*4882a593Smuzhiyun if (slot->qlen == 1) { /* The flow is new */
591*4882a593Smuzhiyun if (q->tail == NULL) { /* It is the first flow */
592*4882a593Smuzhiyun slot->next = x;
593*4882a593Smuzhiyun } else {
594*4882a593Smuzhiyun slot->next = q->tail->next;
595*4882a593Smuzhiyun q->tail->next = x;
596*4882a593Smuzhiyun }
597*4882a593Smuzhiyun q->tail = slot;
598*4882a593Smuzhiyun slot->allot = q->scaled_quantum;
599*4882a593Smuzhiyun }
600*4882a593Smuzhiyun }
601*4882a593Smuzhiyun sch->q.qlen -= dropped;
602*4882a593Smuzhiyun qdisc_tree_reduce_backlog(sch, dropped, drop_len);
603*4882a593Smuzhiyun }
604*4882a593Smuzhiyun
sfq_perturbation(struct timer_list * t)605*4882a593Smuzhiyun static void sfq_perturbation(struct timer_list *t)
606*4882a593Smuzhiyun {
607*4882a593Smuzhiyun struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
608*4882a593Smuzhiyun struct Qdisc *sch = q->sch;
609*4882a593Smuzhiyun spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
610*4882a593Smuzhiyun siphash_key_t nkey;
611*4882a593Smuzhiyun
612*4882a593Smuzhiyun get_random_bytes(&nkey, sizeof(nkey));
613*4882a593Smuzhiyun spin_lock(root_lock);
614*4882a593Smuzhiyun q->perturbation = nkey;
615*4882a593Smuzhiyun if (!q->filter_list && q->tail)
616*4882a593Smuzhiyun sfq_rehash(sch);
617*4882a593Smuzhiyun spin_unlock(root_lock);
618*4882a593Smuzhiyun
619*4882a593Smuzhiyun if (q->perturb_period)
620*4882a593Smuzhiyun mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
621*4882a593Smuzhiyun }
622*4882a593Smuzhiyun
sfq_change(struct Qdisc * sch,struct nlattr * opt)623*4882a593Smuzhiyun static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
624*4882a593Smuzhiyun {
625*4882a593Smuzhiyun struct sfq_sched_data *q = qdisc_priv(sch);
626*4882a593Smuzhiyun struct tc_sfq_qopt *ctl = nla_data(opt);
627*4882a593Smuzhiyun struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
628*4882a593Smuzhiyun unsigned int qlen, dropped = 0;
629*4882a593Smuzhiyun struct red_parms *p = NULL;
630*4882a593Smuzhiyun struct sk_buff *to_free = NULL;
631*4882a593Smuzhiyun struct sk_buff *tail = NULL;
632*4882a593Smuzhiyun
633*4882a593Smuzhiyun if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
634*4882a593Smuzhiyun return -EINVAL;
635*4882a593Smuzhiyun if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
636*4882a593Smuzhiyun ctl_v1 = nla_data(opt);
637*4882a593Smuzhiyun if (ctl->divisor &&
638*4882a593Smuzhiyun (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
639*4882a593Smuzhiyun return -EINVAL;
640*4882a593Smuzhiyun
641*4882a593Smuzhiyun /* slot->allot is a short, make sure quantum is not too big. */
642*4882a593Smuzhiyun if (ctl->quantum) {
643*4882a593Smuzhiyun unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum);
644*4882a593Smuzhiyun
645*4882a593Smuzhiyun if (scaled <= 0 || scaled > SHRT_MAX)
646*4882a593Smuzhiyun return -EINVAL;
647*4882a593Smuzhiyun }
648*4882a593Smuzhiyun
649*4882a593Smuzhiyun if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
650*4882a593Smuzhiyun ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
651*4882a593Smuzhiyun return -EINVAL;
652*4882a593Smuzhiyun if (ctl_v1 && ctl_v1->qth_min) {
653*4882a593Smuzhiyun p = kmalloc(sizeof(*p), GFP_KERNEL);
654*4882a593Smuzhiyun if (!p)
655*4882a593Smuzhiyun return -ENOMEM;
656*4882a593Smuzhiyun }
657*4882a593Smuzhiyun sch_tree_lock(sch);
658*4882a593Smuzhiyun if (ctl->quantum) {
659*4882a593Smuzhiyun q->quantum = ctl->quantum;
660*4882a593Smuzhiyun q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
661*4882a593Smuzhiyun }
662*4882a593Smuzhiyun q->perturb_period = ctl->perturb_period * HZ;
663*4882a593Smuzhiyun if (ctl->flows)
664*4882a593Smuzhiyun q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
665*4882a593Smuzhiyun if (ctl->divisor) {
666*4882a593Smuzhiyun q->divisor = ctl->divisor;
667*4882a593Smuzhiyun q->maxflows = min_t(u32, q->maxflows, q->divisor);
668*4882a593Smuzhiyun }
669*4882a593Smuzhiyun if (ctl_v1) {
670*4882a593Smuzhiyun if (ctl_v1->depth)
671*4882a593Smuzhiyun q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
672*4882a593Smuzhiyun if (p) {
673*4882a593Smuzhiyun swap(q->red_parms, p);
674*4882a593Smuzhiyun red_set_parms(q->red_parms,
675*4882a593Smuzhiyun ctl_v1->qth_min, ctl_v1->qth_max,
676*4882a593Smuzhiyun ctl_v1->Wlog,
677*4882a593Smuzhiyun ctl_v1->Plog, ctl_v1->Scell_log,
678*4882a593Smuzhiyun NULL,
679*4882a593Smuzhiyun ctl_v1->max_P);
680*4882a593Smuzhiyun }
681*4882a593Smuzhiyun q->flags = ctl_v1->flags;
682*4882a593Smuzhiyun q->headdrop = ctl_v1->headdrop;
683*4882a593Smuzhiyun }
684*4882a593Smuzhiyun if (ctl->limit) {
685*4882a593Smuzhiyun q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
686*4882a593Smuzhiyun q->maxflows = min_t(u32, q->maxflows, q->limit);
687*4882a593Smuzhiyun }
688*4882a593Smuzhiyun
689*4882a593Smuzhiyun qlen = sch->q.qlen;
690*4882a593Smuzhiyun while (sch->q.qlen > q->limit) {
691*4882a593Smuzhiyun dropped += sfq_drop(sch, &to_free);
692*4882a593Smuzhiyun if (!tail)
693*4882a593Smuzhiyun tail = to_free;
694*4882a593Smuzhiyun }
695*4882a593Smuzhiyun
696*4882a593Smuzhiyun rtnl_kfree_skbs(to_free, tail);
697*4882a593Smuzhiyun qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
698*4882a593Smuzhiyun
699*4882a593Smuzhiyun del_timer(&q->perturb_timer);
700*4882a593Smuzhiyun if (q->perturb_period) {
701*4882a593Smuzhiyun mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
702*4882a593Smuzhiyun get_random_bytes(&q->perturbation, sizeof(q->perturbation));
703*4882a593Smuzhiyun }
704*4882a593Smuzhiyun sch_tree_unlock(sch);
705*4882a593Smuzhiyun kfree(p);
706*4882a593Smuzhiyun return 0;
707*4882a593Smuzhiyun }
708*4882a593Smuzhiyun
sfq_alloc(size_t sz)709*4882a593Smuzhiyun static void *sfq_alloc(size_t sz)
710*4882a593Smuzhiyun {
711*4882a593Smuzhiyun return kvmalloc(sz, GFP_KERNEL);
712*4882a593Smuzhiyun }
713*4882a593Smuzhiyun
sfq_free(void * addr)714*4882a593Smuzhiyun static void sfq_free(void *addr)
715*4882a593Smuzhiyun {
716*4882a593Smuzhiyun kvfree(addr);
717*4882a593Smuzhiyun }
718*4882a593Smuzhiyun
sfq_destroy(struct Qdisc * sch)719*4882a593Smuzhiyun static void sfq_destroy(struct Qdisc *sch)
720*4882a593Smuzhiyun {
721*4882a593Smuzhiyun struct sfq_sched_data *q = qdisc_priv(sch);
722*4882a593Smuzhiyun
723*4882a593Smuzhiyun tcf_block_put(q->block);
724*4882a593Smuzhiyun q->perturb_period = 0;
725*4882a593Smuzhiyun del_timer_sync(&q->perturb_timer);
726*4882a593Smuzhiyun sfq_free(q->ht);
727*4882a593Smuzhiyun sfq_free(q->slots);
728*4882a593Smuzhiyun kfree(q->red_parms);
729*4882a593Smuzhiyun }
730*4882a593Smuzhiyun
sfq_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)731*4882a593Smuzhiyun static int sfq_init(struct Qdisc *sch, struct nlattr *opt,
732*4882a593Smuzhiyun struct netlink_ext_ack *extack)
733*4882a593Smuzhiyun {
734*4882a593Smuzhiyun struct sfq_sched_data *q = qdisc_priv(sch);
735*4882a593Smuzhiyun int i;
736*4882a593Smuzhiyun int err;
737*4882a593Smuzhiyun
738*4882a593Smuzhiyun q->sch = sch;
739*4882a593Smuzhiyun timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE);
740*4882a593Smuzhiyun
741*4882a593Smuzhiyun err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
742*4882a593Smuzhiyun if (err)
743*4882a593Smuzhiyun return err;
744*4882a593Smuzhiyun
745*4882a593Smuzhiyun for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
746*4882a593Smuzhiyun q->dep[i].next = i + SFQ_MAX_FLOWS;
747*4882a593Smuzhiyun q->dep[i].prev = i + SFQ_MAX_FLOWS;
748*4882a593Smuzhiyun }
749*4882a593Smuzhiyun
750*4882a593Smuzhiyun q->limit = SFQ_MAX_DEPTH;
751*4882a593Smuzhiyun q->maxdepth = SFQ_MAX_DEPTH;
752*4882a593Smuzhiyun q->cur_depth = 0;
753*4882a593Smuzhiyun q->tail = NULL;
754*4882a593Smuzhiyun q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
755*4882a593Smuzhiyun q->maxflows = SFQ_DEFAULT_FLOWS;
756*4882a593Smuzhiyun q->quantum = psched_mtu(qdisc_dev(sch));
757*4882a593Smuzhiyun q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
758*4882a593Smuzhiyun q->perturb_period = 0;
759*4882a593Smuzhiyun get_random_bytes(&q->perturbation, sizeof(q->perturbation));
760*4882a593Smuzhiyun
761*4882a593Smuzhiyun if (opt) {
762*4882a593Smuzhiyun int err = sfq_change(sch, opt);
763*4882a593Smuzhiyun if (err)
764*4882a593Smuzhiyun return err;
765*4882a593Smuzhiyun }
766*4882a593Smuzhiyun
767*4882a593Smuzhiyun q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
768*4882a593Smuzhiyun q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
769*4882a593Smuzhiyun if (!q->ht || !q->slots) {
770*4882a593Smuzhiyun /* Note: sfq_destroy() will be called by our caller */
771*4882a593Smuzhiyun return -ENOMEM;
772*4882a593Smuzhiyun }
773*4882a593Smuzhiyun
774*4882a593Smuzhiyun for (i = 0; i < q->divisor; i++)
775*4882a593Smuzhiyun q->ht[i] = SFQ_EMPTY_SLOT;
776*4882a593Smuzhiyun
777*4882a593Smuzhiyun for (i = 0; i < q->maxflows; i++) {
778*4882a593Smuzhiyun slot_queue_init(&q->slots[i]);
779*4882a593Smuzhiyun sfq_link(q, i);
780*4882a593Smuzhiyun }
781*4882a593Smuzhiyun if (q->limit >= 1)
782*4882a593Smuzhiyun sch->flags |= TCQ_F_CAN_BYPASS;
783*4882a593Smuzhiyun else
784*4882a593Smuzhiyun sch->flags &= ~TCQ_F_CAN_BYPASS;
785*4882a593Smuzhiyun return 0;
786*4882a593Smuzhiyun }
787*4882a593Smuzhiyun
sfq_dump(struct Qdisc * sch,struct sk_buff * skb)788*4882a593Smuzhiyun static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
789*4882a593Smuzhiyun {
790*4882a593Smuzhiyun struct sfq_sched_data *q = qdisc_priv(sch);
791*4882a593Smuzhiyun unsigned char *b = skb_tail_pointer(skb);
792*4882a593Smuzhiyun struct tc_sfq_qopt_v1 opt;
793*4882a593Smuzhiyun struct red_parms *p = q->red_parms;
794*4882a593Smuzhiyun
795*4882a593Smuzhiyun memset(&opt, 0, sizeof(opt));
796*4882a593Smuzhiyun opt.v0.quantum = q->quantum;
797*4882a593Smuzhiyun opt.v0.perturb_period = q->perturb_period / HZ;
798*4882a593Smuzhiyun opt.v0.limit = q->limit;
799*4882a593Smuzhiyun opt.v0.divisor = q->divisor;
800*4882a593Smuzhiyun opt.v0.flows = q->maxflows;
801*4882a593Smuzhiyun opt.depth = q->maxdepth;
802*4882a593Smuzhiyun opt.headdrop = q->headdrop;
803*4882a593Smuzhiyun
804*4882a593Smuzhiyun if (p) {
805*4882a593Smuzhiyun opt.qth_min = p->qth_min >> p->Wlog;
806*4882a593Smuzhiyun opt.qth_max = p->qth_max >> p->Wlog;
807*4882a593Smuzhiyun opt.Wlog = p->Wlog;
808*4882a593Smuzhiyun opt.Plog = p->Plog;
809*4882a593Smuzhiyun opt.Scell_log = p->Scell_log;
810*4882a593Smuzhiyun opt.max_P = p->max_P;
811*4882a593Smuzhiyun }
812*4882a593Smuzhiyun memcpy(&opt.stats, &q->stats, sizeof(opt.stats));
813*4882a593Smuzhiyun opt.flags = q->flags;
814*4882a593Smuzhiyun
815*4882a593Smuzhiyun if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
816*4882a593Smuzhiyun goto nla_put_failure;
817*4882a593Smuzhiyun
818*4882a593Smuzhiyun return skb->len;
819*4882a593Smuzhiyun
820*4882a593Smuzhiyun nla_put_failure:
821*4882a593Smuzhiyun nlmsg_trim(skb, b);
822*4882a593Smuzhiyun return -1;
823*4882a593Smuzhiyun }
824*4882a593Smuzhiyun
sfq_leaf(struct Qdisc * sch,unsigned long arg)825*4882a593Smuzhiyun static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
826*4882a593Smuzhiyun {
827*4882a593Smuzhiyun return NULL;
828*4882a593Smuzhiyun }
829*4882a593Smuzhiyun
sfq_find(struct Qdisc * sch,u32 classid)830*4882a593Smuzhiyun static unsigned long sfq_find(struct Qdisc *sch, u32 classid)
831*4882a593Smuzhiyun {
832*4882a593Smuzhiyun return 0;
833*4882a593Smuzhiyun }
834*4882a593Smuzhiyun
sfq_bind(struct Qdisc * sch,unsigned long parent,u32 classid)835*4882a593Smuzhiyun static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
836*4882a593Smuzhiyun u32 classid)
837*4882a593Smuzhiyun {
838*4882a593Smuzhiyun return 0;
839*4882a593Smuzhiyun }
840*4882a593Smuzhiyun
sfq_unbind(struct Qdisc * q,unsigned long cl)841*4882a593Smuzhiyun static void sfq_unbind(struct Qdisc *q, unsigned long cl)
842*4882a593Smuzhiyun {
843*4882a593Smuzhiyun }
844*4882a593Smuzhiyun
sfq_tcf_block(struct Qdisc * sch,unsigned long cl,struct netlink_ext_ack * extack)845*4882a593Smuzhiyun static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl,
846*4882a593Smuzhiyun struct netlink_ext_ack *extack)
847*4882a593Smuzhiyun {
848*4882a593Smuzhiyun struct sfq_sched_data *q = qdisc_priv(sch);
849*4882a593Smuzhiyun
850*4882a593Smuzhiyun if (cl)
851*4882a593Smuzhiyun return NULL;
852*4882a593Smuzhiyun return q->block;
853*4882a593Smuzhiyun }
854*4882a593Smuzhiyun
sfq_dump_class(struct Qdisc * sch,unsigned long cl,struct sk_buff * skb,struct tcmsg * tcm)855*4882a593Smuzhiyun static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
856*4882a593Smuzhiyun struct sk_buff *skb, struct tcmsg *tcm)
857*4882a593Smuzhiyun {
858*4882a593Smuzhiyun tcm->tcm_handle |= TC_H_MIN(cl);
859*4882a593Smuzhiyun return 0;
860*4882a593Smuzhiyun }
861*4882a593Smuzhiyun
sfq_dump_class_stats(struct Qdisc * sch,unsigned long cl,struct gnet_dump * d)862*4882a593Smuzhiyun static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
863*4882a593Smuzhiyun struct gnet_dump *d)
864*4882a593Smuzhiyun {
865*4882a593Smuzhiyun struct sfq_sched_data *q = qdisc_priv(sch);
866*4882a593Smuzhiyun sfq_index idx = q->ht[cl - 1];
867*4882a593Smuzhiyun struct gnet_stats_queue qs = { 0 };
868*4882a593Smuzhiyun struct tc_sfq_xstats xstats = { 0 };
869*4882a593Smuzhiyun
870*4882a593Smuzhiyun if (idx != SFQ_EMPTY_SLOT) {
871*4882a593Smuzhiyun const struct sfq_slot *slot = &q->slots[idx];
872*4882a593Smuzhiyun
873*4882a593Smuzhiyun xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
874*4882a593Smuzhiyun qs.qlen = slot->qlen;
875*4882a593Smuzhiyun qs.backlog = slot->backlog;
876*4882a593Smuzhiyun }
877*4882a593Smuzhiyun if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
878*4882a593Smuzhiyun return -1;
879*4882a593Smuzhiyun return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
880*4882a593Smuzhiyun }
881*4882a593Smuzhiyun
sfq_walk(struct Qdisc * sch,struct qdisc_walker * arg)882*4882a593Smuzhiyun static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
883*4882a593Smuzhiyun {
884*4882a593Smuzhiyun struct sfq_sched_data *q = qdisc_priv(sch);
885*4882a593Smuzhiyun unsigned int i;
886*4882a593Smuzhiyun
887*4882a593Smuzhiyun if (arg->stop)
888*4882a593Smuzhiyun return;
889*4882a593Smuzhiyun
890*4882a593Smuzhiyun for (i = 0; i < q->divisor; i++) {
891*4882a593Smuzhiyun if (q->ht[i] == SFQ_EMPTY_SLOT ||
892*4882a593Smuzhiyun arg->count < arg->skip) {
893*4882a593Smuzhiyun arg->count++;
894*4882a593Smuzhiyun continue;
895*4882a593Smuzhiyun }
896*4882a593Smuzhiyun if (arg->fn(sch, i + 1, arg) < 0) {
897*4882a593Smuzhiyun arg->stop = 1;
898*4882a593Smuzhiyun break;
899*4882a593Smuzhiyun }
900*4882a593Smuzhiyun arg->count++;
901*4882a593Smuzhiyun }
902*4882a593Smuzhiyun }
903*4882a593Smuzhiyun
904*4882a593Smuzhiyun static const struct Qdisc_class_ops sfq_class_ops = {
905*4882a593Smuzhiyun .leaf = sfq_leaf,
906*4882a593Smuzhiyun .find = sfq_find,
907*4882a593Smuzhiyun .tcf_block = sfq_tcf_block,
908*4882a593Smuzhiyun .bind_tcf = sfq_bind,
909*4882a593Smuzhiyun .unbind_tcf = sfq_unbind,
910*4882a593Smuzhiyun .dump = sfq_dump_class,
911*4882a593Smuzhiyun .dump_stats = sfq_dump_class_stats,
912*4882a593Smuzhiyun .walk = sfq_walk,
913*4882a593Smuzhiyun };
914*4882a593Smuzhiyun
915*4882a593Smuzhiyun static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
916*4882a593Smuzhiyun .cl_ops = &sfq_class_ops,
917*4882a593Smuzhiyun .id = "sfq",
918*4882a593Smuzhiyun .priv_size = sizeof(struct sfq_sched_data),
919*4882a593Smuzhiyun .enqueue = sfq_enqueue,
920*4882a593Smuzhiyun .dequeue = sfq_dequeue,
921*4882a593Smuzhiyun .peek = qdisc_peek_dequeued,
922*4882a593Smuzhiyun .init = sfq_init,
923*4882a593Smuzhiyun .reset = sfq_reset,
924*4882a593Smuzhiyun .destroy = sfq_destroy,
925*4882a593Smuzhiyun .change = NULL,
926*4882a593Smuzhiyun .dump = sfq_dump,
927*4882a593Smuzhiyun .owner = THIS_MODULE,
928*4882a593Smuzhiyun };
929*4882a593Smuzhiyun
sfq_module_init(void)930*4882a593Smuzhiyun static int __init sfq_module_init(void)
931*4882a593Smuzhiyun {
932*4882a593Smuzhiyun return register_qdisc(&sfq_qdisc_ops);
933*4882a593Smuzhiyun }
sfq_module_exit(void)934*4882a593Smuzhiyun static void __exit sfq_module_exit(void)
935*4882a593Smuzhiyun {
936*4882a593Smuzhiyun unregister_qdisc(&sfq_qdisc_ops);
937*4882a593Smuzhiyun }
938*4882a593Smuzhiyun module_init(sfq_module_init)
939*4882a593Smuzhiyun module_exit(sfq_module_exit)
940*4882a593Smuzhiyun MODULE_LICENSE("GPL");
941