1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /* Flow Queue PIE discipline
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * Copyright (C) 2019 Mohit P. Tahiliani <tahiliani@nitk.edu.in>
5*4882a593Smuzhiyun * Copyright (C) 2019 Sachin D. Patil <sdp.sachin@gmail.com>
6*4882a593Smuzhiyun * Copyright (C) 2019 V. Saicharan <vsaicharan1998@gmail.com>
7*4882a593Smuzhiyun * Copyright (C) 2019 Mohit Bhasi <mohitbhasi1998@gmail.com>
8*4882a593Smuzhiyun * Copyright (C) 2019 Leslie Monis <lesliemonis@gmail.com>
9*4882a593Smuzhiyun * Copyright (C) 2019 Gautam Ramakrishnan <gautamramk@gmail.com>
10*4882a593Smuzhiyun */
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun #include <linux/jhash.h>
13*4882a593Smuzhiyun #include <linux/sizes.h>
14*4882a593Smuzhiyun #include <linux/vmalloc.h>
15*4882a593Smuzhiyun #include <net/pkt_cls.h>
16*4882a593Smuzhiyun #include <net/pie.h>
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun /* Flow Queue PIE
19*4882a593Smuzhiyun *
20*4882a593Smuzhiyun * Principles:
21*4882a593Smuzhiyun * - Packets are classified on flows.
22*4882a593Smuzhiyun * - This is a Stochastic model (as we use a hash, several flows might
23*4882a593Smuzhiyun * be hashed to the same slot)
24*4882a593Smuzhiyun * - Each flow has a PIE managed queue.
25*4882a593Smuzhiyun * - Flows are linked onto two (Round Robin) lists,
26*4882a593Smuzhiyun * so that new flows have priority on old ones.
27*4882a593Smuzhiyun * - For a given flow, packets are not reordered.
28*4882a593Smuzhiyun * - Drops during enqueue only.
29*4882a593Smuzhiyun * - ECN capability is off by default.
30*4882a593Smuzhiyun * - ECN threshold (if ECN is enabled) is at 10% by default.
31*4882a593Smuzhiyun * - Uses timestamps to calculate queue delay by default.
32*4882a593Smuzhiyun */
33*4882a593Smuzhiyun
34*4882a593Smuzhiyun /**
35*4882a593Smuzhiyun * struct fq_pie_flow - contains data for each flow
36*4882a593Smuzhiyun * @vars: pie vars associated with the flow
37*4882a593Smuzhiyun * @deficit: number of remaining byte credits
38*4882a593Smuzhiyun * @backlog: size of data in the flow
39*4882a593Smuzhiyun * @qlen: number of packets in the flow
40*4882a593Smuzhiyun * @flowchain: flowchain for the flow
41*4882a593Smuzhiyun * @head: first packet in the flow
42*4882a593Smuzhiyun * @tail: last packet in the flow
43*4882a593Smuzhiyun */
44*4882a593Smuzhiyun struct fq_pie_flow {
45*4882a593Smuzhiyun struct pie_vars vars;
46*4882a593Smuzhiyun s32 deficit;
47*4882a593Smuzhiyun u32 backlog;
48*4882a593Smuzhiyun u32 qlen;
49*4882a593Smuzhiyun struct list_head flowchain;
50*4882a593Smuzhiyun struct sk_buff *head;
51*4882a593Smuzhiyun struct sk_buff *tail;
52*4882a593Smuzhiyun };
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun struct fq_pie_sched_data {
55*4882a593Smuzhiyun struct tcf_proto __rcu *filter_list; /* optional external classifier */
56*4882a593Smuzhiyun struct tcf_block *block;
57*4882a593Smuzhiyun struct fq_pie_flow *flows;
58*4882a593Smuzhiyun struct Qdisc *sch;
59*4882a593Smuzhiyun struct list_head old_flows;
60*4882a593Smuzhiyun struct list_head new_flows;
61*4882a593Smuzhiyun struct pie_params p_params;
62*4882a593Smuzhiyun u32 ecn_prob;
63*4882a593Smuzhiyun u32 flows_cnt;
64*4882a593Smuzhiyun u32 quantum;
65*4882a593Smuzhiyun u32 memory_limit;
66*4882a593Smuzhiyun u32 new_flow_count;
67*4882a593Smuzhiyun u32 memory_usage;
68*4882a593Smuzhiyun u32 overmemory;
69*4882a593Smuzhiyun struct pie_stats stats;
70*4882a593Smuzhiyun struct timer_list adapt_timer;
71*4882a593Smuzhiyun };
72*4882a593Smuzhiyun
fq_pie_hash(const struct fq_pie_sched_data * q,struct sk_buff * skb)73*4882a593Smuzhiyun static unsigned int fq_pie_hash(const struct fq_pie_sched_data *q,
74*4882a593Smuzhiyun struct sk_buff *skb)
75*4882a593Smuzhiyun {
76*4882a593Smuzhiyun return reciprocal_scale(skb_get_hash(skb), q->flows_cnt);
77*4882a593Smuzhiyun }
78*4882a593Smuzhiyun
fq_pie_classify(struct sk_buff * skb,struct Qdisc * sch,int * qerr)79*4882a593Smuzhiyun static unsigned int fq_pie_classify(struct sk_buff *skb, struct Qdisc *sch,
80*4882a593Smuzhiyun int *qerr)
81*4882a593Smuzhiyun {
82*4882a593Smuzhiyun struct fq_pie_sched_data *q = qdisc_priv(sch);
83*4882a593Smuzhiyun struct tcf_proto *filter;
84*4882a593Smuzhiyun struct tcf_result res;
85*4882a593Smuzhiyun int result;
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun if (TC_H_MAJ(skb->priority) == sch->handle &&
88*4882a593Smuzhiyun TC_H_MIN(skb->priority) > 0 &&
89*4882a593Smuzhiyun TC_H_MIN(skb->priority) <= q->flows_cnt)
90*4882a593Smuzhiyun return TC_H_MIN(skb->priority);
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun filter = rcu_dereference_bh(q->filter_list);
93*4882a593Smuzhiyun if (!filter)
94*4882a593Smuzhiyun return fq_pie_hash(q, skb) + 1;
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
97*4882a593Smuzhiyun result = tcf_classify(skb, filter, &res, false);
98*4882a593Smuzhiyun if (result >= 0) {
99*4882a593Smuzhiyun #ifdef CONFIG_NET_CLS_ACT
100*4882a593Smuzhiyun switch (result) {
101*4882a593Smuzhiyun case TC_ACT_STOLEN:
102*4882a593Smuzhiyun case TC_ACT_QUEUED:
103*4882a593Smuzhiyun case TC_ACT_TRAP:
104*4882a593Smuzhiyun *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
105*4882a593Smuzhiyun fallthrough;
106*4882a593Smuzhiyun case TC_ACT_SHOT:
107*4882a593Smuzhiyun return 0;
108*4882a593Smuzhiyun }
109*4882a593Smuzhiyun #endif
110*4882a593Smuzhiyun if (TC_H_MIN(res.classid) <= q->flows_cnt)
111*4882a593Smuzhiyun return TC_H_MIN(res.classid);
112*4882a593Smuzhiyun }
113*4882a593Smuzhiyun return 0;
114*4882a593Smuzhiyun }
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun /* add skb to flow queue (tail add) */
flow_queue_add(struct fq_pie_flow * flow,struct sk_buff * skb)117*4882a593Smuzhiyun static inline void flow_queue_add(struct fq_pie_flow *flow,
118*4882a593Smuzhiyun struct sk_buff *skb)
119*4882a593Smuzhiyun {
120*4882a593Smuzhiyun if (!flow->head)
121*4882a593Smuzhiyun flow->head = skb;
122*4882a593Smuzhiyun else
123*4882a593Smuzhiyun flow->tail->next = skb;
124*4882a593Smuzhiyun flow->tail = skb;
125*4882a593Smuzhiyun skb->next = NULL;
126*4882a593Smuzhiyun }
127*4882a593Smuzhiyun
fq_pie_qdisc_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)128*4882a593Smuzhiyun static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
129*4882a593Smuzhiyun struct sk_buff **to_free)
130*4882a593Smuzhiyun {
131*4882a593Smuzhiyun struct fq_pie_sched_data *q = qdisc_priv(sch);
132*4882a593Smuzhiyun struct fq_pie_flow *sel_flow;
133*4882a593Smuzhiyun int ret;
134*4882a593Smuzhiyun u8 memory_limited = false;
135*4882a593Smuzhiyun u8 enqueue = false;
136*4882a593Smuzhiyun u32 pkt_len;
137*4882a593Smuzhiyun u32 idx;
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun /* Classifies packet into corresponding flow */
140*4882a593Smuzhiyun idx = fq_pie_classify(skb, sch, &ret);
141*4882a593Smuzhiyun if (idx == 0) {
142*4882a593Smuzhiyun if (ret & __NET_XMIT_BYPASS)
143*4882a593Smuzhiyun qdisc_qstats_drop(sch);
144*4882a593Smuzhiyun __qdisc_drop(skb, to_free);
145*4882a593Smuzhiyun return ret;
146*4882a593Smuzhiyun }
147*4882a593Smuzhiyun idx--;
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun sel_flow = &q->flows[idx];
150*4882a593Smuzhiyun /* Checks whether adding a new packet would exceed memory limit */
151*4882a593Smuzhiyun get_pie_cb(skb)->mem_usage = skb->truesize;
152*4882a593Smuzhiyun memory_limited = q->memory_usage > q->memory_limit + skb->truesize;
153*4882a593Smuzhiyun
154*4882a593Smuzhiyun /* Checks if the qdisc is full */
155*4882a593Smuzhiyun if (unlikely(qdisc_qlen(sch) >= sch->limit)) {
156*4882a593Smuzhiyun q->stats.overlimit++;
157*4882a593Smuzhiyun goto out;
158*4882a593Smuzhiyun } else if (unlikely(memory_limited)) {
159*4882a593Smuzhiyun q->overmemory++;
160*4882a593Smuzhiyun }
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun if (!pie_drop_early(sch, &q->p_params, &sel_flow->vars,
163*4882a593Smuzhiyun sel_flow->backlog, skb->len)) {
164*4882a593Smuzhiyun enqueue = true;
165*4882a593Smuzhiyun } else if (q->p_params.ecn &&
166*4882a593Smuzhiyun sel_flow->vars.prob <= (MAX_PROB / 100) * q->ecn_prob &&
167*4882a593Smuzhiyun INET_ECN_set_ce(skb)) {
168*4882a593Smuzhiyun /* If packet is ecn capable, mark it if drop probability
169*4882a593Smuzhiyun * is lower than the parameter ecn_prob, else drop it.
170*4882a593Smuzhiyun */
171*4882a593Smuzhiyun q->stats.ecn_mark++;
172*4882a593Smuzhiyun enqueue = true;
173*4882a593Smuzhiyun }
174*4882a593Smuzhiyun if (enqueue) {
175*4882a593Smuzhiyun /* Set enqueue time only when dq_rate_estimator is disabled. */
176*4882a593Smuzhiyun if (!q->p_params.dq_rate_estimator)
177*4882a593Smuzhiyun pie_set_enqueue_time(skb);
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun pkt_len = qdisc_pkt_len(skb);
180*4882a593Smuzhiyun q->stats.packets_in++;
181*4882a593Smuzhiyun q->memory_usage += skb->truesize;
182*4882a593Smuzhiyun sch->qstats.backlog += pkt_len;
183*4882a593Smuzhiyun sch->q.qlen++;
184*4882a593Smuzhiyun flow_queue_add(sel_flow, skb);
185*4882a593Smuzhiyun if (list_empty(&sel_flow->flowchain)) {
186*4882a593Smuzhiyun list_add_tail(&sel_flow->flowchain, &q->new_flows);
187*4882a593Smuzhiyun q->new_flow_count++;
188*4882a593Smuzhiyun sel_flow->deficit = q->quantum;
189*4882a593Smuzhiyun sel_flow->qlen = 0;
190*4882a593Smuzhiyun sel_flow->backlog = 0;
191*4882a593Smuzhiyun }
192*4882a593Smuzhiyun sel_flow->qlen++;
193*4882a593Smuzhiyun sel_flow->backlog += pkt_len;
194*4882a593Smuzhiyun return NET_XMIT_SUCCESS;
195*4882a593Smuzhiyun }
196*4882a593Smuzhiyun out:
197*4882a593Smuzhiyun q->stats.dropped++;
198*4882a593Smuzhiyun sel_flow->vars.accu_prob = 0;
199*4882a593Smuzhiyun __qdisc_drop(skb, to_free);
200*4882a593Smuzhiyun qdisc_qstats_drop(sch);
201*4882a593Smuzhiyun return NET_XMIT_CN;
202*4882a593Smuzhiyun }
203*4882a593Smuzhiyun
204*4882a593Smuzhiyun static const struct nla_policy fq_pie_policy[TCA_FQ_PIE_MAX + 1] = {
205*4882a593Smuzhiyun [TCA_FQ_PIE_LIMIT] = {.type = NLA_U32},
206*4882a593Smuzhiyun [TCA_FQ_PIE_FLOWS] = {.type = NLA_U32},
207*4882a593Smuzhiyun [TCA_FQ_PIE_TARGET] = {.type = NLA_U32},
208*4882a593Smuzhiyun [TCA_FQ_PIE_TUPDATE] = {.type = NLA_U32},
209*4882a593Smuzhiyun [TCA_FQ_PIE_ALPHA] = {.type = NLA_U32},
210*4882a593Smuzhiyun [TCA_FQ_PIE_BETA] = {.type = NLA_U32},
211*4882a593Smuzhiyun [TCA_FQ_PIE_QUANTUM] = {.type = NLA_U32},
212*4882a593Smuzhiyun [TCA_FQ_PIE_MEMORY_LIMIT] = {.type = NLA_U32},
213*4882a593Smuzhiyun [TCA_FQ_PIE_ECN_PROB] = {.type = NLA_U32},
214*4882a593Smuzhiyun [TCA_FQ_PIE_ECN] = {.type = NLA_U32},
215*4882a593Smuzhiyun [TCA_FQ_PIE_BYTEMODE] = {.type = NLA_U32},
216*4882a593Smuzhiyun [TCA_FQ_PIE_DQ_RATE_ESTIMATOR] = {.type = NLA_U32},
217*4882a593Smuzhiyun };
218*4882a593Smuzhiyun
dequeue_head(struct fq_pie_flow * flow)219*4882a593Smuzhiyun static inline struct sk_buff *dequeue_head(struct fq_pie_flow *flow)
220*4882a593Smuzhiyun {
221*4882a593Smuzhiyun struct sk_buff *skb = flow->head;
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun flow->head = skb->next;
224*4882a593Smuzhiyun skb->next = NULL;
225*4882a593Smuzhiyun return skb;
226*4882a593Smuzhiyun }
227*4882a593Smuzhiyun
fq_pie_qdisc_dequeue(struct Qdisc * sch)228*4882a593Smuzhiyun static struct sk_buff *fq_pie_qdisc_dequeue(struct Qdisc *sch)
229*4882a593Smuzhiyun {
230*4882a593Smuzhiyun struct fq_pie_sched_data *q = qdisc_priv(sch);
231*4882a593Smuzhiyun struct sk_buff *skb = NULL;
232*4882a593Smuzhiyun struct fq_pie_flow *flow;
233*4882a593Smuzhiyun struct list_head *head;
234*4882a593Smuzhiyun u32 pkt_len;
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun begin:
237*4882a593Smuzhiyun head = &q->new_flows;
238*4882a593Smuzhiyun if (list_empty(head)) {
239*4882a593Smuzhiyun head = &q->old_flows;
240*4882a593Smuzhiyun if (list_empty(head))
241*4882a593Smuzhiyun return NULL;
242*4882a593Smuzhiyun }
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun flow = list_first_entry(head, struct fq_pie_flow, flowchain);
245*4882a593Smuzhiyun /* Flow has exhausted all its credits */
246*4882a593Smuzhiyun if (flow->deficit <= 0) {
247*4882a593Smuzhiyun flow->deficit += q->quantum;
248*4882a593Smuzhiyun list_move_tail(&flow->flowchain, &q->old_flows);
249*4882a593Smuzhiyun goto begin;
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun
252*4882a593Smuzhiyun if (flow->head) {
253*4882a593Smuzhiyun skb = dequeue_head(flow);
254*4882a593Smuzhiyun pkt_len = qdisc_pkt_len(skb);
255*4882a593Smuzhiyun sch->qstats.backlog -= pkt_len;
256*4882a593Smuzhiyun sch->q.qlen--;
257*4882a593Smuzhiyun qdisc_bstats_update(sch, skb);
258*4882a593Smuzhiyun }
259*4882a593Smuzhiyun
260*4882a593Smuzhiyun if (!skb) {
261*4882a593Smuzhiyun /* force a pass through old_flows to prevent starvation */
262*4882a593Smuzhiyun if (head == &q->new_flows && !list_empty(&q->old_flows))
263*4882a593Smuzhiyun list_move_tail(&flow->flowchain, &q->old_flows);
264*4882a593Smuzhiyun else
265*4882a593Smuzhiyun list_del_init(&flow->flowchain);
266*4882a593Smuzhiyun goto begin;
267*4882a593Smuzhiyun }
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun flow->qlen--;
270*4882a593Smuzhiyun flow->deficit -= pkt_len;
271*4882a593Smuzhiyun flow->backlog -= pkt_len;
272*4882a593Smuzhiyun q->memory_usage -= get_pie_cb(skb)->mem_usage;
273*4882a593Smuzhiyun pie_process_dequeue(skb, &q->p_params, &flow->vars, flow->backlog);
274*4882a593Smuzhiyun return skb;
275*4882a593Smuzhiyun }
276*4882a593Smuzhiyun
fq_pie_change(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)277*4882a593Smuzhiyun static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
278*4882a593Smuzhiyun struct netlink_ext_ack *extack)
279*4882a593Smuzhiyun {
280*4882a593Smuzhiyun struct fq_pie_sched_data *q = qdisc_priv(sch);
281*4882a593Smuzhiyun struct nlattr *tb[TCA_FQ_PIE_MAX + 1];
282*4882a593Smuzhiyun unsigned int len_dropped = 0;
283*4882a593Smuzhiyun unsigned int num_dropped = 0;
284*4882a593Smuzhiyun int err;
285*4882a593Smuzhiyun
286*4882a593Smuzhiyun if (!opt)
287*4882a593Smuzhiyun return -EINVAL;
288*4882a593Smuzhiyun
289*4882a593Smuzhiyun err = nla_parse_nested(tb, TCA_FQ_PIE_MAX, opt, fq_pie_policy, extack);
290*4882a593Smuzhiyun if (err < 0)
291*4882a593Smuzhiyun return err;
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun sch_tree_lock(sch);
294*4882a593Smuzhiyun if (tb[TCA_FQ_PIE_LIMIT]) {
295*4882a593Smuzhiyun u32 limit = nla_get_u32(tb[TCA_FQ_PIE_LIMIT]);
296*4882a593Smuzhiyun
297*4882a593Smuzhiyun q->p_params.limit = limit;
298*4882a593Smuzhiyun sch->limit = limit;
299*4882a593Smuzhiyun }
300*4882a593Smuzhiyun if (tb[TCA_FQ_PIE_FLOWS]) {
301*4882a593Smuzhiyun if (q->flows) {
302*4882a593Smuzhiyun NL_SET_ERR_MSG_MOD(extack,
303*4882a593Smuzhiyun "Number of flows cannot be changed");
304*4882a593Smuzhiyun goto flow_error;
305*4882a593Smuzhiyun }
306*4882a593Smuzhiyun q->flows_cnt = nla_get_u32(tb[TCA_FQ_PIE_FLOWS]);
307*4882a593Smuzhiyun if (!q->flows_cnt || q->flows_cnt > 65536) {
308*4882a593Smuzhiyun NL_SET_ERR_MSG_MOD(extack,
309*4882a593Smuzhiyun "Number of flows must range in [1..65536]");
310*4882a593Smuzhiyun goto flow_error;
311*4882a593Smuzhiyun }
312*4882a593Smuzhiyun }
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun /* convert from microseconds to pschedtime */
315*4882a593Smuzhiyun if (tb[TCA_FQ_PIE_TARGET]) {
316*4882a593Smuzhiyun /* target is in us */
317*4882a593Smuzhiyun u32 target = nla_get_u32(tb[TCA_FQ_PIE_TARGET]);
318*4882a593Smuzhiyun
319*4882a593Smuzhiyun /* convert to pschedtime */
320*4882a593Smuzhiyun q->p_params.target =
321*4882a593Smuzhiyun PSCHED_NS2TICKS((u64)target * NSEC_PER_USEC);
322*4882a593Smuzhiyun }
323*4882a593Smuzhiyun
324*4882a593Smuzhiyun /* tupdate is in jiffies */
325*4882a593Smuzhiyun if (tb[TCA_FQ_PIE_TUPDATE])
326*4882a593Smuzhiyun q->p_params.tupdate =
327*4882a593Smuzhiyun usecs_to_jiffies(nla_get_u32(tb[TCA_FQ_PIE_TUPDATE]));
328*4882a593Smuzhiyun
329*4882a593Smuzhiyun if (tb[TCA_FQ_PIE_ALPHA])
330*4882a593Smuzhiyun q->p_params.alpha = nla_get_u32(tb[TCA_FQ_PIE_ALPHA]);
331*4882a593Smuzhiyun
332*4882a593Smuzhiyun if (tb[TCA_FQ_PIE_BETA])
333*4882a593Smuzhiyun q->p_params.beta = nla_get_u32(tb[TCA_FQ_PIE_BETA]);
334*4882a593Smuzhiyun
335*4882a593Smuzhiyun if (tb[TCA_FQ_PIE_QUANTUM])
336*4882a593Smuzhiyun q->quantum = nla_get_u32(tb[TCA_FQ_PIE_QUANTUM]);
337*4882a593Smuzhiyun
338*4882a593Smuzhiyun if (tb[TCA_FQ_PIE_MEMORY_LIMIT])
339*4882a593Smuzhiyun q->memory_limit = nla_get_u32(tb[TCA_FQ_PIE_MEMORY_LIMIT]);
340*4882a593Smuzhiyun
341*4882a593Smuzhiyun if (tb[TCA_FQ_PIE_ECN_PROB])
342*4882a593Smuzhiyun q->ecn_prob = nla_get_u32(tb[TCA_FQ_PIE_ECN_PROB]);
343*4882a593Smuzhiyun
344*4882a593Smuzhiyun if (tb[TCA_FQ_PIE_ECN])
345*4882a593Smuzhiyun q->p_params.ecn = nla_get_u32(tb[TCA_FQ_PIE_ECN]);
346*4882a593Smuzhiyun
347*4882a593Smuzhiyun if (tb[TCA_FQ_PIE_BYTEMODE])
348*4882a593Smuzhiyun q->p_params.bytemode = nla_get_u32(tb[TCA_FQ_PIE_BYTEMODE]);
349*4882a593Smuzhiyun
350*4882a593Smuzhiyun if (tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR])
351*4882a593Smuzhiyun q->p_params.dq_rate_estimator =
352*4882a593Smuzhiyun nla_get_u32(tb[TCA_FQ_PIE_DQ_RATE_ESTIMATOR]);
353*4882a593Smuzhiyun
354*4882a593Smuzhiyun /* Drop excess packets if new limit is lower */
355*4882a593Smuzhiyun while (sch->q.qlen > sch->limit) {
356*4882a593Smuzhiyun struct sk_buff *skb = fq_pie_qdisc_dequeue(sch);
357*4882a593Smuzhiyun
358*4882a593Smuzhiyun len_dropped += qdisc_pkt_len(skb);
359*4882a593Smuzhiyun num_dropped += 1;
360*4882a593Smuzhiyun rtnl_kfree_skbs(skb, skb);
361*4882a593Smuzhiyun }
362*4882a593Smuzhiyun qdisc_tree_reduce_backlog(sch, num_dropped, len_dropped);
363*4882a593Smuzhiyun
364*4882a593Smuzhiyun sch_tree_unlock(sch);
365*4882a593Smuzhiyun return 0;
366*4882a593Smuzhiyun
367*4882a593Smuzhiyun flow_error:
368*4882a593Smuzhiyun sch_tree_unlock(sch);
369*4882a593Smuzhiyun return -EINVAL;
370*4882a593Smuzhiyun }
371*4882a593Smuzhiyun
fq_pie_timer(struct timer_list * t)372*4882a593Smuzhiyun static void fq_pie_timer(struct timer_list *t)
373*4882a593Smuzhiyun {
374*4882a593Smuzhiyun struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer);
375*4882a593Smuzhiyun struct Qdisc *sch = q->sch;
376*4882a593Smuzhiyun spinlock_t *root_lock; /* to lock qdisc for probability calculations */
377*4882a593Smuzhiyun u32 idx;
378*4882a593Smuzhiyun
379*4882a593Smuzhiyun root_lock = qdisc_lock(qdisc_root_sleeping(sch));
380*4882a593Smuzhiyun spin_lock(root_lock);
381*4882a593Smuzhiyun
382*4882a593Smuzhiyun for (idx = 0; idx < q->flows_cnt; idx++)
383*4882a593Smuzhiyun pie_calculate_probability(&q->p_params, &q->flows[idx].vars,
384*4882a593Smuzhiyun q->flows[idx].backlog);
385*4882a593Smuzhiyun
386*4882a593Smuzhiyun /* reset the timer to fire after 'tupdate' jiffies. */
387*4882a593Smuzhiyun if (q->p_params.tupdate)
388*4882a593Smuzhiyun mod_timer(&q->adapt_timer, jiffies + q->p_params.tupdate);
389*4882a593Smuzhiyun
390*4882a593Smuzhiyun spin_unlock(root_lock);
391*4882a593Smuzhiyun }
392*4882a593Smuzhiyun
fq_pie_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)393*4882a593Smuzhiyun static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt,
394*4882a593Smuzhiyun struct netlink_ext_ack *extack)
395*4882a593Smuzhiyun {
396*4882a593Smuzhiyun struct fq_pie_sched_data *q = qdisc_priv(sch);
397*4882a593Smuzhiyun int err;
398*4882a593Smuzhiyun u32 idx;
399*4882a593Smuzhiyun
400*4882a593Smuzhiyun pie_params_init(&q->p_params);
401*4882a593Smuzhiyun sch->limit = 10 * 1024;
402*4882a593Smuzhiyun q->p_params.limit = sch->limit;
403*4882a593Smuzhiyun q->quantum = psched_mtu(qdisc_dev(sch));
404*4882a593Smuzhiyun q->sch = sch;
405*4882a593Smuzhiyun q->ecn_prob = 10;
406*4882a593Smuzhiyun q->flows_cnt = 1024;
407*4882a593Smuzhiyun q->memory_limit = SZ_32M;
408*4882a593Smuzhiyun
409*4882a593Smuzhiyun INIT_LIST_HEAD(&q->new_flows);
410*4882a593Smuzhiyun INIT_LIST_HEAD(&q->old_flows);
411*4882a593Smuzhiyun timer_setup(&q->adapt_timer, fq_pie_timer, 0);
412*4882a593Smuzhiyun
413*4882a593Smuzhiyun if (opt) {
414*4882a593Smuzhiyun err = fq_pie_change(sch, opt, extack);
415*4882a593Smuzhiyun
416*4882a593Smuzhiyun if (err)
417*4882a593Smuzhiyun return err;
418*4882a593Smuzhiyun }
419*4882a593Smuzhiyun
420*4882a593Smuzhiyun err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
421*4882a593Smuzhiyun if (err)
422*4882a593Smuzhiyun goto init_failure;
423*4882a593Smuzhiyun
424*4882a593Smuzhiyun q->flows = kvcalloc(q->flows_cnt, sizeof(struct fq_pie_flow),
425*4882a593Smuzhiyun GFP_KERNEL);
426*4882a593Smuzhiyun if (!q->flows) {
427*4882a593Smuzhiyun err = -ENOMEM;
428*4882a593Smuzhiyun goto init_failure;
429*4882a593Smuzhiyun }
430*4882a593Smuzhiyun for (idx = 0; idx < q->flows_cnt; idx++) {
431*4882a593Smuzhiyun struct fq_pie_flow *flow = q->flows + idx;
432*4882a593Smuzhiyun
433*4882a593Smuzhiyun INIT_LIST_HEAD(&flow->flowchain);
434*4882a593Smuzhiyun pie_vars_init(&flow->vars);
435*4882a593Smuzhiyun }
436*4882a593Smuzhiyun
437*4882a593Smuzhiyun mod_timer(&q->adapt_timer, jiffies + HZ / 2);
438*4882a593Smuzhiyun
439*4882a593Smuzhiyun return 0;
440*4882a593Smuzhiyun
441*4882a593Smuzhiyun init_failure:
442*4882a593Smuzhiyun q->flows_cnt = 0;
443*4882a593Smuzhiyun
444*4882a593Smuzhiyun return err;
445*4882a593Smuzhiyun }
446*4882a593Smuzhiyun
fq_pie_dump(struct Qdisc * sch,struct sk_buff * skb)447*4882a593Smuzhiyun static int fq_pie_dump(struct Qdisc *sch, struct sk_buff *skb)
448*4882a593Smuzhiyun {
449*4882a593Smuzhiyun struct fq_pie_sched_data *q = qdisc_priv(sch);
450*4882a593Smuzhiyun struct nlattr *opts;
451*4882a593Smuzhiyun
452*4882a593Smuzhiyun opts = nla_nest_start(skb, TCA_OPTIONS);
453*4882a593Smuzhiyun if (!opts)
454*4882a593Smuzhiyun return -EMSGSIZE;
455*4882a593Smuzhiyun
456*4882a593Smuzhiyun /* convert target from pschedtime to us */
457*4882a593Smuzhiyun if (nla_put_u32(skb, TCA_FQ_PIE_LIMIT, sch->limit) ||
458*4882a593Smuzhiyun nla_put_u32(skb, TCA_FQ_PIE_FLOWS, q->flows_cnt) ||
459*4882a593Smuzhiyun nla_put_u32(skb, TCA_FQ_PIE_TARGET,
460*4882a593Smuzhiyun ((u32)PSCHED_TICKS2NS(q->p_params.target)) /
461*4882a593Smuzhiyun NSEC_PER_USEC) ||
462*4882a593Smuzhiyun nla_put_u32(skb, TCA_FQ_PIE_TUPDATE,
463*4882a593Smuzhiyun jiffies_to_usecs(q->p_params.tupdate)) ||
464*4882a593Smuzhiyun nla_put_u32(skb, TCA_FQ_PIE_ALPHA, q->p_params.alpha) ||
465*4882a593Smuzhiyun nla_put_u32(skb, TCA_FQ_PIE_BETA, q->p_params.beta) ||
466*4882a593Smuzhiyun nla_put_u32(skb, TCA_FQ_PIE_QUANTUM, q->quantum) ||
467*4882a593Smuzhiyun nla_put_u32(skb, TCA_FQ_PIE_MEMORY_LIMIT, q->memory_limit) ||
468*4882a593Smuzhiyun nla_put_u32(skb, TCA_FQ_PIE_ECN_PROB, q->ecn_prob) ||
469*4882a593Smuzhiyun nla_put_u32(skb, TCA_FQ_PIE_ECN, q->p_params.ecn) ||
470*4882a593Smuzhiyun nla_put_u32(skb, TCA_FQ_PIE_BYTEMODE, q->p_params.bytemode) ||
471*4882a593Smuzhiyun nla_put_u32(skb, TCA_FQ_PIE_DQ_RATE_ESTIMATOR,
472*4882a593Smuzhiyun q->p_params.dq_rate_estimator))
473*4882a593Smuzhiyun goto nla_put_failure;
474*4882a593Smuzhiyun
475*4882a593Smuzhiyun return nla_nest_end(skb, opts);
476*4882a593Smuzhiyun
477*4882a593Smuzhiyun nla_put_failure:
478*4882a593Smuzhiyun nla_nest_cancel(skb, opts);
479*4882a593Smuzhiyun return -EMSGSIZE;
480*4882a593Smuzhiyun }
481*4882a593Smuzhiyun
fq_pie_dump_stats(struct Qdisc * sch,struct gnet_dump * d)482*4882a593Smuzhiyun static int fq_pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
483*4882a593Smuzhiyun {
484*4882a593Smuzhiyun struct fq_pie_sched_data *q = qdisc_priv(sch);
485*4882a593Smuzhiyun struct tc_fq_pie_xstats st = {
486*4882a593Smuzhiyun .packets_in = q->stats.packets_in,
487*4882a593Smuzhiyun .overlimit = q->stats.overlimit,
488*4882a593Smuzhiyun .overmemory = q->overmemory,
489*4882a593Smuzhiyun .dropped = q->stats.dropped,
490*4882a593Smuzhiyun .ecn_mark = q->stats.ecn_mark,
491*4882a593Smuzhiyun .new_flow_count = q->new_flow_count,
492*4882a593Smuzhiyun .memory_usage = q->memory_usage,
493*4882a593Smuzhiyun };
494*4882a593Smuzhiyun struct list_head *pos;
495*4882a593Smuzhiyun
496*4882a593Smuzhiyun sch_tree_lock(sch);
497*4882a593Smuzhiyun list_for_each(pos, &q->new_flows)
498*4882a593Smuzhiyun st.new_flows_len++;
499*4882a593Smuzhiyun
500*4882a593Smuzhiyun list_for_each(pos, &q->old_flows)
501*4882a593Smuzhiyun st.old_flows_len++;
502*4882a593Smuzhiyun sch_tree_unlock(sch);
503*4882a593Smuzhiyun
504*4882a593Smuzhiyun return gnet_stats_copy_app(d, &st, sizeof(st));
505*4882a593Smuzhiyun }
506*4882a593Smuzhiyun
fq_pie_reset(struct Qdisc * sch)507*4882a593Smuzhiyun static void fq_pie_reset(struct Qdisc *sch)
508*4882a593Smuzhiyun {
509*4882a593Smuzhiyun struct fq_pie_sched_data *q = qdisc_priv(sch);
510*4882a593Smuzhiyun u32 idx;
511*4882a593Smuzhiyun
512*4882a593Smuzhiyun INIT_LIST_HEAD(&q->new_flows);
513*4882a593Smuzhiyun INIT_LIST_HEAD(&q->old_flows);
514*4882a593Smuzhiyun for (idx = 0; idx < q->flows_cnt; idx++) {
515*4882a593Smuzhiyun struct fq_pie_flow *flow = q->flows + idx;
516*4882a593Smuzhiyun
517*4882a593Smuzhiyun /* Removes all packets from flow */
518*4882a593Smuzhiyun rtnl_kfree_skbs(flow->head, flow->tail);
519*4882a593Smuzhiyun flow->head = NULL;
520*4882a593Smuzhiyun
521*4882a593Smuzhiyun INIT_LIST_HEAD(&flow->flowchain);
522*4882a593Smuzhiyun pie_vars_init(&flow->vars);
523*4882a593Smuzhiyun }
524*4882a593Smuzhiyun }
525*4882a593Smuzhiyun
fq_pie_destroy(struct Qdisc * sch)526*4882a593Smuzhiyun static void fq_pie_destroy(struct Qdisc *sch)
527*4882a593Smuzhiyun {
528*4882a593Smuzhiyun struct fq_pie_sched_data *q = qdisc_priv(sch);
529*4882a593Smuzhiyun
530*4882a593Smuzhiyun tcf_block_put(q->block);
531*4882a593Smuzhiyun q->p_params.tupdate = 0;
532*4882a593Smuzhiyun del_timer_sync(&q->adapt_timer);
533*4882a593Smuzhiyun kvfree(q->flows);
534*4882a593Smuzhiyun }
535*4882a593Smuzhiyun
536*4882a593Smuzhiyun static struct Qdisc_ops fq_pie_qdisc_ops __read_mostly = {
537*4882a593Smuzhiyun .id = "fq_pie",
538*4882a593Smuzhiyun .priv_size = sizeof(struct fq_pie_sched_data),
539*4882a593Smuzhiyun .enqueue = fq_pie_qdisc_enqueue,
540*4882a593Smuzhiyun .dequeue = fq_pie_qdisc_dequeue,
541*4882a593Smuzhiyun .peek = qdisc_peek_dequeued,
542*4882a593Smuzhiyun .init = fq_pie_init,
543*4882a593Smuzhiyun .destroy = fq_pie_destroy,
544*4882a593Smuzhiyun .reset = fq_pie_reset,
545*4882a593Smuzhiyun .change = fq_pie_change,
546*4882a593Smuzhiyun .dump = fq_pie_dump,
547*4882a593Smuzhiyun .dump_stats = fq_pie_dump_stats,
548*4882a593Smuzhiyun .owner = THIS_MODULE,
549*4882a593Smuzhiyun };
550*4882a593Smuzhiyun
fq_pie_module_init(void)551*4882a593Smuzhiyun static int __init fq_pie_module_init(void)
552*4882a593Smuzhiyun {
553*4882a593Smuzhiyun return register_qdisc(&fq_pie_qdisc_ops);
554*4882a593Smuzhiyun }
555*4882a593Smuzhiyun
fq_pie_module_exit(void)556*4882a593Smuzhiyun static void __exit fq_pie_module_exit(void)
557*4882a593Smuzhiyun {
558*4882a593Smuzhiyun unregister_qdisc(&fq_pie_qdisc_ops);
559*4882a593Smuzhiyun }
560*4882a593Smuzhiyun
561*4882a593Smuzhiyun module_init(fq_pie_module_init);
562*4882a593Smuzhiyun module_exit(fq_pie_module_exit);
563*4882a593Smuzhiyun
564*4882a593Smuzhiyun MODULE_DESCRIPTION("Flow Queue Proportional Integral controller Enhanced (FQ-PIE)");
565*4882a593Smuzhiyun MODULE_AUTHOR("Mohit P. Tahiliani");
566*4882a593Smuzhiyun MODULE_LICENSE("GPL");
567