1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
2*4882a593Smuzhiyun /* Copyright (c) 2019 Mellanox Technologies. */
3*4882a593Smuzhiyun
4*4882a593Smuzhiyun #ifndef DIM_H
5*4882a593Smuzhiyun #define DIM_H
6*4882a593Smuzhiyun
7*4882a593Smuzhiyun #include <linux/bits.h>
8*4882a593Smuzhiyun #include <linux/kernel.h>
9*4882a593Smuzhiyun #include <linux/module.h>
10*4882a593Smuzhiyun #include <linux/types.h>
11*4882a593Smuzhiyun #include <linux/workqueue.h>
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun /*
14*4882a593Smuzhiyun * Number of events between DIM iterations.
15*4882a593Smuzhiyun * Causes a moderation of the algorithm run.
16*4882a593Smuzhiyun */
17*4882a593Smuzhiyun #define DIM_NEVENTS 64
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun /*
20*4882a593Smuzhiyun * Is a difference between values justifies taking an action.
21*4882a593Smuzhiyun * We consider 10% difference as significant.
22*4882a593Smuzhiyun */
23*4882a593Smuzhiyun #define IS_SIGNIFICANT_DIFF(val, ref) \
24*4882a593Smuzhiyun ((ref) && (((100UL * abs((val) - (ref))) / (ref)) > 10))
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun /*
27*4882a593Smuzhiyun * Calculate the gap between two values.
28*4882a593Smuzhiyun * Take wrap-around and variable size into consideration.
29*4882a593Smuzhiyun */
30*4882a593Smuzhiyun #define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) \
31*4882a593Smuzhiyun & (BIT_ULL(bits) - 1))
32*4882a593Smuzhiyun
33*4882a593Smuzhiyun /**
34*4882a593Smuzhiyun * struct dim_cq_moder - Structure for CQ moderation values.
35*4882a593Smuzhiyun * Used for communications between DIM and its consumer.
36*4882a593Smuzhiyun *
37*4882a593Smuzhiyun * @usec: CQ timer suggestion (by DIM)
38*4882a593Smuzhiyun * @pkts: CQ packet counter suggestion (by DIM)
39*4882a593Smuzhiyun * @comps: Completion counter
40*4882a593Smuzhiyun * @cq_period_mode: CQ period count mode (from CQE/EQE)
41*4882a593Smuzhiyun */
42*4882a593Smuzhiyun struct dim_cq_moder {
43*4882a593Smuzhiyun u16 usec;
44*4882a593Smuzhiyun u16 pkts;
45*4882a593Smuzhiyun u16 comps;
46*4882a593Smuzhiyun u8 cq_period_mode;
47*4882a593Smuzhiyun };
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun /**
50*4882a593Smuzhiyun * struct dim_sample - Structure for DIM sample data.
51*4882a593Smuzhiyun * Used for communications between DIM and its consumer.
52*4882a593Smuzhiyun *
53*4882a593Smuzhiyun * @time: Sample timestamp
54*4882a593Smuzhiyun * @pkt_ctr: Number of packets
55*4882a593Smuzhiyun * @byte_ctr: Number of bytes
56*4882a593Smuzhiyun * @event_ctr: Number of events
57*4882a593Smuzhiyun * @comp_ctr: Current completion counter
58*4882a593Smuzhiyun */
59*4882a593Smuzhiyun struct dim_sample {
60*4882a593Smuzhiyun ktime_t time;
61*4882a593Smuzhiyun u32 pkt_ctr;
62*4882a593Smuzhiyun u32 byte_ctr;
63*4882a593Smuzhiyun u16 event_ctr;
64*4882a593Smuzhiyun u32 comp_ctr;
65*4882a593Smuzhiyun };
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun /**
68*4882a593Smuzhiyun * struct dim_stats - Structure for DIM stats.
69*4882a593Smuzhiyun * Used for holding current measured rates.
70*4882a593Smuzhiyun *
71*4882a593Smuzhiyun * @ppms: Packets per msec
72*4882a593Smuzhiyun * @bpms: Bytes per msec
73*4882a593Smuzhiyun * @epms: Events per msec
74*4882a593Smuzhiyun * @cpms: Completions per msec
75*4882a593Smuzhiyun * @cpe_ratio: Ratio of completions to events
76*4882a593Smuzhiyun */
77*4882a593Smuzhiyun struct dim_stats {
78*4882a593Smuzhiyun int ppms; /* packets per msec */
79*4882a593Smuzhiyun int bpms; /* bytes per msec */
80*4882a593Smuzhiyun int epms; /* events per msec */
81*4882a593Smuzhiyun int cpms; /* completions per msec */
82*4882a593Smuzhiyun int cpe_ratio; /* ratio of completions to events */
83*4882a593Smuzhiyun };
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun /**
86*4882a593Smuzhiyun * struct dim - Main structure for dynamic interrupt moderation (DIM).
87*4882a593Smuzhiyun * Used for holding all information about a specific DIM instance.
88*4882a593Smuzhiyun *
89*4882a593Smuzhiyun * @state: Algorithm state (see below)
90*4882a593Smuzhiyun * @prev_stats: Measured rates from previous iteration (for comparison)
91*4882a593Smuzhiyun * @start_sample: Sampled data at start of current iteration
92*4882a593Smuzhiyun * @measuring_sample: A &dim_sample that is used to update the current events
93*4882a593Smuzhiyun * @work: Work to perform on action required
94*4882a593Smuzhiyun * @priv: A pointer to the struct that points to dim
95*4882a593Smuzhiyun * @profile_ix: Current moderation profile
96*4882a593Smuzhiyun * @mode: CQ period count mode
97*4882a593Smuzhiyun * @tune_state: Algorithm tuning state (see below)
98*4882a593Smuzhiyun * @steps_right: Number of steps taken towards higher moderation
99*4882a593Smuzhiyun * @steps_left: Number of steps taken towards lower moderation
100*4882a593Smuzhiyun * @tired: Parking depth counter
101*4882a593Smuzhiyun */
102*4882a593Smuzhiyun struct dim {
103*4882a593Smuzhiyun u8 state;
104*4882a593Smuzhiyun struct dim_stats prev_stats;
105*4882a593Smuzhiyun struct dim_sample start_sample;
106*4882a593Smuzhiyun struct dim_sample measuring_sample;
107*4882a593Smuzhiyun struct work_struct work;
108*4882a593Smuzhiyun void *priv;
109*4882a593Smuzhiyun u8 profile_ix;
110*4882a593Smuzhiyun u8 mode;
111*4882a593Smuzhiyun u8 tune_state;
112*4882a593Smuzhiyun u8 steps_right;
113*4882a593Smuzhiyun u8 steps_left;
114*4882a593Smuzhiyun u8 tired;
115*4882a593Smuzhiyun };
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun /**
118*4882a593Smuzhiyun * enum dim_cq_period_mode - Modes for CQ period count
119*4882a593Smuzhiyun *
120*4882a593Smuzhiyun * @DIM_CQ_PERIOD_MODE_START_FROM_EQE: Start counting from EQE
121*4882a593Smuzhiyun * @DIM_CQ_PERIOD_MODE_START_FROM_CQE: Start counting from CQE (implies timer reset)
122*4882a593Smuzhiyun * @DIM_CQ_PERIOD_NUM_MODES: Number of modes
123*4882a593Smuzhiyun */
124*4882a593Smuzhiyun enum dim_cq_period_mode {
125*4882a593Smuzhiyun DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0,
126*4882a593Smuzhiyun DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1,
127*4882a593Smuzhiyun DIM_CQ_PERIOD_NUM_MODES
128*4882a593Smuzhiyun };
129*4882a593Smuzhiyun
130*4882a593Smuzhiyun /**
131*4882a593Smuzhiyun * enum dim_state - DIM algorithm states
132*4882a593Smuzhiyun *
133*4882a593Smuzhiyun * These will determine if the algorithm is in a valid state to start an iteration.
134*4882a593Smuzhiyun *
135*4882a593Smuzhiyun * @DIM_START_MEASURE: This is the first iteration (also after applying a new profile)
136*4882a593Smuzhiyun * @DIM_MEASURE_IN_PROGRESS: Algorithm is already in progress - check if
137*4882a593Smuzhiyun * need to perform an action
138*4882a593Smuzhiyun * @DIM_APPLY_NEW_PROFILE: DIM consumer is currently applying a profile - no need to measure
139*4882a593Smuzhiyun */
140*4882a593Smuzhiyun enum dim_state {
141*4882a593Smuzhiyun DIM_START_MEASURE,
142*4882a593Smuzhiyun DIM_MEASURE_IN_PROGRESS,
143*4882a593Smuzhiyun DIM_APPLY_NEW_PROFILE,
144*4882a593Smuzhiyun };
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun /**
147*4882a593Smuzhiyun * enum dim_tune_state - DIM algorithm tune states
148*4882a593Smuzhiyun *
149*4882a593Smuzhiyun * These will determine which action the algorithm should perform.
150*4882a593Smuzhiyun *
151*4882a593Smuzhiyun * @DIM_PARKING_ON_TOP: Algorithm found a local top point - exit on significant difference
152*4882a593Smuzhiyun * @DIM_PARKING_TIRED: Algorithm found a deep top point - don't exit if tired > 0
153*4882a593Smuzhiyun * @DIM_GOING_RIGHT: Algorithm is currently trying higher moderation levels
154*4882a593Smuzhiyun * @DIM_GOING_LEFT: Algorithm is currently trying lower moderation levels
155*4882a593Smuzhiyun */
156*4882a593Smuzhiyun enum dim_tune_state {
157*4882a593Smuzhiyun DIM_PARKING_ON_TOP,
158*4882a593Smuzhiyun DIM_PARKING_TIRED,
159*4882a593Smuzhiyun DIM_GOING_RIGHT,
160*4882a593Smuzhiyun DIM_GOING_LEFT,
161*4882a593Smuzhiyun };
162*4882a593Smuzhiyun
163*4882a593Smuzhiyun /**
164*4882a593Smuzhiyun * enum dim_stats_state - DIM algorithm statistics states
165*4882a593Smuzhiyun *
166*4882a593Smuzhiyun * These will determine the verdict of current iteration.
167*4882a593Smuzhiyun *
168*4882a593Smuzhiyun * @DIM_STATS_WORSE: Current iteration shows worse performance than before
169*4882a593Smuzhiyun * @DIM_STATS_SAME: Current iteration shows same performance than before
170*4882a593Smuzhiyun * @DIM_STATS_BETTER: Current iteration shows better performance than before
171*4882a593Smuzhiyun */
172*4882a593Smuzhiyun enum dim_stats_state {
173*4882a593Smuzhiyun DIM_STATS_WORSE,
174*4882a593Smuzhiyun DIM_STATS_SAME,
175*4882a593Smuzhiyun DIM_STATS_BETTER,
176*4882a593Smuzhiyun };
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun /**
179*4882a593Smuzhiyun * enum dim_step_result - DIM algorithm step results
180*4882a593Smuzhiyun *
181*4882a593Smuzhiyun * These describe the result of a step.
182*4882a593Smuzhiyun *
183*4882a593Smuzhiyun * @DIM_STEPPED: Performed a regular step
184*4882a593Smuzhiyun * @DIM_TOO_TIRED: Same kind of step was done multiple times - should go to
185*4882a593Smuzhiyun * tired parking
186*4882a593Smuzhiyun * @DIM_ON_EDGE: Stepped to the most left/right profile
187*4882a593Smuzhiyun */
188*4882a593Smuzhiyun enum dim_step_result {
189*4882a593Smuzhiyun DIM_STEPPED,
190*4882a593Smuzhiyun DIM_TOO_TIRED,
191*4882a593Smuzhiyun DIM_ON_EDGE,
192*4882a593Smuzhiyun };
193*4882a593Smuzhiyun
194*4882a593Smuzhiyun /**
195*4882a593Smuzhiyun * dim_on_top - check if current state is a good place to stop (top location)
196*4882a593Smuzhiyun * @dim: DIM context
197*4882a593Smuzhiyun *
198*4882a593Smuzhiyun * Check if current profile is a good place to park at.
199*4882a593Smuzhiyun * This will result in reducing the DIM checks frequency as we assume we
200*4882a593Smuzhiyun * shouldn't probably change profiles, unless traffic pattern wasn't changed.
201*4882a593Smuzhiyun */
202*4882a593Smuzhiyun bool dim_on_top(struct dim *dim);
203*4882a593Smuzhiyun
204*4882a593Smuzhiyun /**
205*4882a593Smuzhiyun * dim_turn - change profile altering direction
206*4882a593Smuzhiyun * @dim: DIM context
207*4882a593Smuzhiyun *
208*4882a593Smuzhiyun * Go left if we were going right and vice-versa.
209*4882a593Smuzhiyun * Do nothing if currently parking.
210*4882a593Smuzhiyun */
211*4882a593Smuzhiyun void dim_turn(struct dim *dim);
212*4882a593Smuzhiyun
213*4882a593Smuzhiyun /**
214*4882a593Smuzhiyun * dim_park_on_top - enter a parking state on a top location
215*4882a593Smuzhiyun * @dim: DIM context
216*4882a593Smuzhiyun *
217*4882a593Smuzhiyun * Enter parking state.
218*4882a593Smuzhiyun * Clear all movement history.
219*4882a593Smuzhiyun */
220*4882a593Smuzhiyun void dim_park_on_top(struct dim *dim);
221*4882a593Smuzhiyun
222*4882a593Smuzhiyun /**
223*4882a593Smuzhiyun * dim_park_tired - enter a tired parking state
224*4882a593Smuzhiyun * @dim: DIM context
225*4882a593Smuzhiyun *
226*4882a593Smuzhiyun * Enter parking state.
227*4882a593Smuzhiyun * Clear all movement history and cause DIM checks frequency to reduce.
228*4882a593Smuzhiyun */
229*4882a593Smuzhiyun void dim_park_tired(struct dim *dim);
230*4882a593Smuzhiyun
231*4882a593Smuzhiyun /**
232*4882a593Smuzhiyun * dim_calc_stats - calculate the difference between two samples
233*4882a593Smuzhiyun * @start: start sample
234*4882a593Smuzhiyun * @end: end sample
235*4882a593Smuzhiyun * @curr_stats: delta between samples
236*4882a593Smuzhiyun *
237*4882a593Smuzhiyun * Calculate the delta between two samples (in data rates).
238*4882a593Smuzhiyun * Takes into consideration counter wrap-around.
239*4882a593Smuzhiyun */
240*4882a593Smuzhiyun void dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
241*4882a593Smuzhiyun struct dim_stats *curr_stats);
242*4882a593Smuzhiyun
243*4882a593Smuzhiyun /**
244*4882a593Smuzhiyun * dim_update_sample - set a sample's fields with given values
245*4882a593Smuzhiyun * @event_ctr: number of events to set
246*4882a593Smuzhiyun * @packets: number of packets to set
247*4882a593Smuzhiyun * @bytes: number of bytes to set
248*4882a593Smuzhiyun * @s: DIM sample
249*4882a593Smuzhiyun */
250*4882a593Smuzhiyun static inline void
dim_update_sample(u16 event_ctr,u64 packets,u64 bytes,struct dim_sample * s)251*4882a593Smuzhiyun dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, struct dim_sample *s)
252*4882a593Smuzhiyun {
253*4882a593Smuzhiyun s->time = ktime_get();
254*4882a593Smuzhiyun s->pkt_ctr = packets;
255*4882a593Smuzhiyun s->byte_ctr = bytes;
256*4882a593Smuzhiyun s->event_ctr = event_ctr;
257*4882a593Smuzhiyun }
258*4882a593Smuzhiyun
259*4882a593Smuzhiyun /**
260*4882a593Smuzhiyun * dim_update_sample_with_comps - set a sample's fields with given
261*4882a593Smuzhiyun * values including the completion parameter
262*4882a593Smuzhiyun * @event_ctr: number of events to set
263*4882a593Smuzhiyun * @packets: number of packets to set
264*4882a593Smuzhiyun * @bytes: number of bytes to set
265*4882a593Smuzhiyun * @comps: number of completions to set
266*4882a593Smuzhiyun * @s: DIM sample
267*4882a593Smuzhiyun */
268*4882a593Smuzhiyun static inline void
dim_update_sample_with_comps(u16 event_ctr,u64 packets,u64 bytes,u64 comps,struct dim_sample * s)269*4882a593Smuzhiyun dim_update_sample_with_comps(u16 event_ctr, u64 packets, u64 bytes, u64 comps,
270*4882a593Smuzhiyun struct dim_sample *s)
271*4882a593Smuzhiyun {
272*4882a593Smuzhiyun dim_update_sample(event_ctr, packets, bytes, s);
273*4882a593Smuzhiyun s->comp_ctr = comps;
274*4882a593Smuzhiyun }
275*4882a593Smuzhiyun
276*4882a593Smuzhiyun /* Net DIM */
277*4882a593Smuzhiyun
278*4882a593Smuzhiyun /**
279*4882a593Smuzhiyun * net_dim_get_rx_moderation - provide a CQ moderation object for the given RX profile
280*4882a593Smuzhiyun * @cq_period_mode: CQ period mode
281*4882a593Smuzhiyun * @ix: Profile index
282*4882a593Smuzhiyun */
283*4882a593Smuzhiyun struct dim_cq_moder net_dim_get_rx_moderation(u8 cq_period_mode, int ix);
284*4882a593Smuzhiyun
285*4882a593Smuzhiyun /**
286*4882a593Smuzhiyun * net_dim_get_def_rx_moderation - provide the default RX moderation
287*4882a593Smuzhiyun * @cq_period_mode: CQ period mode
288*4882a593Smuzhiyun */
289*4882a593Smuzhiyun struct dim_cq_moder net_dim_get_def_rx_moderation(u8 cq_period_mode);
290*4882a593Smuzhiyun
291*4882a593Smuzhiyun /**
292*4882a593Smuzhiyun * net_dim_get_tx_moderation - provide a CQ moderation object for the given TX profile
293*4882a593Smuzhiyun * @cq_period_mode: CQ period mode
294*4882a593Smuzhiyun * @ix: Profile index
295*4882a593Smuzhiyun */
296*4882a593Smuzhiyun struct dim_cq_moder net_dim_get_tx_moderation(u8 cq_period_mode, int ix);
297*4882a593Smuzhiyun
298*4882a593Smuzhiyun /**
299*4882a593Smuzhiyun * net_dim_get_def_tx_moderation - provide the default TX moderation
300*4882a593Smuzhiyun * @cq_period_mode: CQ period mode
301*4882a593Smuzhiyun */
302*4882a593Smuzhiyun struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode);
303*4882a593Smuzhiyun
304*4882a593Smuzhiyun /**
305*4882a593Smuzhiyun * net_dim - main DIM algorithm entry point
306*4882a593Smuzhiyun * @dim: DIM instance information
307*4882a593Smuzhiyun * @end_sample: Current data measurement
308*4882a593Smuzhiyun *
309*4882a593Smuzhiyun * Called by the consumer.
310*4882a593Smuzhiyun * This is the main logic of the algorithm, where data is processed in order
311*4882a593Smuzhiyun * to decide on next required action.
312*4882a593Smuzhiyun */
313*4882a593Smuzhiyun void net_dim(struct dim *dim, struct dim_sample end_sample);
314*4882a593Smuzhiyun
315*4882a593Smuzhiyun /* RDMA DIM */
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun /*
318*4882a593Smuzhiyun * RDMA DIM profile:
319*4882a593Smuzhiyun * profile size must be of RDMA_DIM_PARAMS_NUM_PROFILES.
320*4882a593Smuzhiyun */
321*4882a593Smuzhiyun #define RDMA_DIM_PARAMS_NUM_PROFILES 9
322*4882a593Smuzhiyun #define RDMA_DIM_START_PROFILE 0
323*4882a593Smuzhiyun
324*4882a593Smuzhiyun /**
325*4882a593Smuzhiyun * rdma_dim - Runs the adaptive moderation.
326*4882a593Smuzhiyun * @dim: The moderation struct.
327*4882a593Smuzhiyun * @completions: The number of completions collected in this round.
328*4882a593Smuzhiyun *
329*4882a593Smuzhiyun * Each call to rdma_dim takes the latest amount of completions that
330*4882a593Smuzhiyun * have been collected and counts them as a new event.
331*4882a593Smuzhiyun * Once enough events have been collected the algorithm decides a new
332*4882a593Smuzhiyun * moderation level.
333*4882a593Smuzhiyun */
334*4882a593Smuzhiyun void rdma_dim(struct dim *dim, u64 completions);
335*4882a593Smuzhiyun
336*4882a593Smuzhiyun #endif /* DIM_H */
337