xref: /OK3568_Linux_fs/kernel/include/linux/dim.h (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
2*4882a593Smuzhiyun /* Copyright (c) 2019 Mellanox Technologies. */
3*4882a593Smuzhiyun 
4*4882a593Smuzhiyun #ifndef DIM_H
5*4882a593Smuzhiyun #define DIM_H
6*4882a593Smuzhiyun 
7*4882a593Smuzhiyun #include <linux/bits.h>
8*4882a593Smuzhiyun #include <linux/kernel.h>
9*4882a593Smuzhiyun #include <linux/module.h>
10*4882a593Smuzhiyun #include <linux/types.h>
11*4882a593Smuzhiyun #include <linux/workqueue.h>
12*4882a593Smuzhiyun 
13*4882a593Smuzhiyun /*
14*4882a593Smuzhiyun  * Number of events between DIM iterations.
15*4882a593Smuzhiyun  * Causes a moderation of the algorithm run.
16*4882a593Smuzhiyun  */
17*4882a593Smuzhiyun #define DIM_NEVENTS 64
18*4882a593Smuzhiyun 
19*4882a593Smuzhiyun /*
20*4882a593Smuzhiyun  * Is a difference between values justifies taking an action.
21*4882a593Smuzhiyun  * We consider 10% difference as significant.
22*4882a593Smuzhiyun  */
23*4882a593Smuzhiyun #define IS_SIGNIFICANT_DIFF(val, ref) \
24*4882a593Smuzhiyun 	((ref) && (((100UL * abs((val) - (ref))) / (ref)) > 10))
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun /*
27*4882a593Smuzhiyun  * Calculate the gap between two values.
28*4882a593Smuzhiyun  * Take wrap-around and variable size into consideration.
29*4882a593Smuzhiyun  */
30*4882a593Smuzhiyun #define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) \
31*4882a593Smuzhiyun 		& (BIT_ULL(bits) - 1))
32*4882a593Smuzhiyun 
33*4882a593Smuzhiyun /**
34*4882a593Smuzhiyun  * struct dim_cq_moder - Structure for CQ moderation values.
35*4882a593Smuzhiyun  * Used for communications between DIM and its consumer.
36*4882a593Smuzhiyun  *
37*4882a593Smuzhiyun  * @usec: CQ timer suggestion (by DIM)
38*4882a593Smuzhiyun  * @pkts: CQ packet counter suggestion (by DIM)
39*4882a593Smuzhiyun  * @comps: Completion counter
40*4882a593Smuzhiyun  * @cq_period_mode: CQ period count mode (from CQE/EQE)
41*4882a593Smuzhiyun  */
42*4882a593Smuzhiyun struct dim_cq_moder {
43*4882a593Smuzhiyun 	u16 usec;
44*4882a593Smuzhiyun 	u16 pkts;
45*4882a593Smuzhiyun 	u16 comps;
46*4882a593Smuzhiyun 	u8 cq_period_mode;
47*4882a593Smuzhiyun };
48*4882a593Smuzhiyun 
49*4882a593Smuzhiyun /**
50*4882a593Smuzhiyun  * struct dim_sample - Structure for DIM sample data.
51*4882a593Smuzhiyun  * Used for communications between DIM and its consumer.
52*4882a593Smuzhiyun  *
53*4882a593Smuzhiyun  * @time: Sample timestamp
54*4882a593Smuzhiyun  * @pkt_ctr: Number of packets
55*4882a593Smuzhiyun  * @byte_ctr: Number of bytes
56*4882a593Smuzhiyun  * @event_ctr: Number of events
57*4882a593Smuzhiyun  * @comp_ctr: Current completion counter
58*4882a593Smuzhiyun  */
59*4882a593Smuzhiyun struct dim_sample {
60*4882a593Smuzhiyun 	ktime_t time;
61*4882a593Smuzhiyun 	u32 pkt_ctr;
62*4882a593Smuzhiyun 	u32 byte_ctr;
63*4882a593Smuzhiyun 	u16 event_ctr;
64*4882a593Smuzhiyun 	u32 comp_ctr;
65*4882a593Smuzhiyun };
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun /**
68*4882a593Smuzhiyun  * struct dim_stats - Structure for DIM stats.
69*4882a593Smuzhiyun  * Used for holding current measured rates.
70*4882a593Smuzhiyun  *
71*4882a593Smuzhiyun  * @ppms: Packets per msec
72*4882a593Smuzhiyun  * @bpms: Bytes per msec
73*4882a593Smuzhiyun  * @epms: Events per msec
74*4882a593Smuzhiyun  * @cpms: Completions per msec
75*4882a593Smuzhiyun  * @cpe_ratio: Ratio of completions to events
76*4882a593Smuzhiyun  */
77*4882a593Smuzhiyun struct dim_stats {
78*4882a593Smuzhiyun 	int ppms; /* packets per msec */
79*4882a593Smuzhiyun 	int bpms; /* bytes per msec */
80*4882a593Smuzhiyun 	int epms; /* events per msec */
81*4882a593Smuzhiyun 	int cpms; /* completions per msec */
82*4882a593Smuzhiyun 	int cpe_ratio; /* ratio of completions to events */
83*4882a593Smuzhiyun };
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun /**
86*4882a593Smuzhiyun  * struct dim - Main structure for dynamic interrupt moderation (DIM).
87*4882a593Smuzhiyun  * Used for holding all information about a specific DIM instance.
88*4882a593Smuzhiyun  *
89*4882a593Smuzhiyun  * @state: Algorithm state (see below)
90*4882a593Smuzhiyun  * @prev_stats: Measured rates from previous iteration (for comparison)
91*4882a593Smuzhiyun  * @start_sample: Sampled data at start of current iteration
92*4882a593Smuzhiyun  * @measuring_sample: A &dim_sample that is used to update the current events
93*4882a593Smuzhiyun  * @work: Work to perform on action required
94*4882a593Smuzhiyun  * @priv: A pointer to the struct that points to dim
95*4882a593Smuzhiyun  * @profile_ix: Current moderation profile
96*4882a593Smuzhiyun  * @mode: CQ period count mode
97*4882a593Smuzhiyun  * @tune_state: Algorithm tuning state (see below)
98*4882a593Smuzhiyun  * @steps_right: Number of steps taken towards higher moderation
99*4882a593Smuzhiyun  * @steps_left: Number of steps taken towards lower moderation
100*4882a593Smuzhiyun  * @tired: Parking depth counter
101*4882a593Smuzhiyun  */
102*4882a593Smuzhiyun struct dim {
103*4882a593Smuzhiyun 	u8 state;
104*4882a593Smuzhiyun 	struct dim_stats prev_stats;
105*4882a593Smuzhiyun 	struct dim_sample start_sample;
106*4882a593Smuzhiyun 	struct dim_sample measuring_sample;
107*4882a593Smuzhiyun 	struct work_struct work;
108*4882a593Smuzhiyun 	void *priv;
109*4882a593Smuzhiyun 	u8 profile_ix;
110*4882a593Smuzhiyun 	u8 mode;
111*4882a593Smuzhiyun 	u8 tune_state;
112*4882a593Smuzhiyun 	u8 steps_right;
113*4882a593Smuzhiyun 	u8 steps_left;
114*4882a593Smuzhiyun 	u8 tired;
115*4882a593Smuzhiyun };
116*4882a593Smuzhiyun 
117*4882a593Smuzhiyun /**
118*4882a593Smuzhiyun  * enum dim_cq_period_mode - Modes for CQ period count
119*4882a593Smuzhiyun  *
120*4882a593Smuzhiyun  * @DIM_CQ_PERIOD_MODE_START_FROM_EQE: Start counting from EQE
121*4882a593Smuzhiyun  * @DIM_CQ_PERIOD_MODE_START_FROM_CQE: Start counting from CQE (implies timer reset)
122*4882a593Smuzhiyun  * @DIM_CQ_PERIOD_NUM_MODES: Number of modes
123*4882a593Smuzhiyun  */
124*4882a593Smuzhiyun enum dim_cq_period_mode {
125*4882a593Smuzhiyun 	DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0,
126*4882a593Smuzhiyun 	DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1,
127*4882a593Smuzhiyun 	DIM_CQ_PERIOD_NUM_MODES
128*4882a593Smuzhiyun };
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun /**
131*4882a593Smuzhiyun  * enum dim_state - DIM algorithm states
132*4882a593Smuzhiyun  *
133*4882a593Smuzhiyun  * These will determine if the algorithm is in a valid state to start an iteration.
134*4882a593Smuzhiyun  *
135*4882a593Smuzhiyun  * @DIM_START_MEASURE: This is the first iteration (also after applying a new profile)
136*4882a593Smuzhiyun  * @DIM_MEASURE_IN_PROGRESS: Algorithm is already in progress - check if
137*4882a593Smuzhiyun  * need to perform an action
138*4882a593Smuzhiyun  * @DIM_APPLY_NEW_PROFILE: DIM consumer is currently applying a profile - no need to measure
139*4882a593Smuzhiyun  */
140*4882a593Smuzhiyun enum dim_state {
141*4882a593Smuzhiyun 	DIM_START_MEASURE,
142*4882a593Smuzhiyun 	DIM_MEASURE_IN_PROGRESS,
143*4882a593Smuzhiyun 	DIM_APPLY_NEW_PROFILE,
144*4882a593Smuzhiyun };
145*4882a593Smuzhiyun 
146*4882a593Smuzhiyun /**
147*4882a593Smuzhiyun  * enum dim_tune_state - DIM algorithm tune states
148*4882a593Smuzhiyun  *
149*4882a593Smuzhiyun  * These will determine which action the algorithm should perform.
150*4882a593Smuzhiyun  *
151*4882a593Smuzhiyun  * @DIM_PARKING_ON_TOP: Algorithm found a local top point - exit on significant difference
152*4882a593Smuzhiyun  * @DIM_PARKING_TIRED: Algorithm found a deep top point - don't exit if tired > 0
153*4882a593Smuzhiyun  * @DIM_GOING_RIGHT: Algorithm is currently trying higher moderation levels
154*4882a593Smuzhiyun  * @DIM_GOING_LEFT: Algorithm is currently trying lower moderation levels
155*4882a593Smuzhiyun  */
156*4882a593Smuzhiyun enum dim_tune_state {
157*4882a593Smuzhiyun 	DIM_PARKING_ON_TOP,
158*4882a593Smuzhiyun 	DIM_PARKING_TIRED,
159*4882a593Smuzhiyun 	DIM_GOING_RIGHT,
160*4882a593Smuzhiyun 	DIM_GOING_LEFT,
161*4882a593Smuzhiyun };
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun /**
164*4882a593Smuzhiyun  * enum dim_stats_state - DIM algorithm statistics states
165*4882a593Smuzhiyun  *
166*4882a593Smuzhiyun  * These will determine the verdict of current iteration.
167*4882a593Smuzhiyun  *
168*4882a593Smuzhiyun  * @DIM_STATS_WORSE: Current iteration shows worse performance than before
169*4882a593Smuzhiyun  * @DIM_STATS_SAME:  Current iteration shows same performance than before
170*4882a593Smuzhiyun  * @DIM_STATS_BETTER: Current iteration shows better performance than before
171*4882a593Smuzhiyun  */
172*4882a593Smuzhiyun enum dim_stats_state {
173*4882a593Smuzhiyun 	DIM_STATS_WORSE,
174*4882a593Smuzhiyun 	DIM_STATS_SAME,
175*4882a593Smuzhiyun 	DIM_STATS_BETTER,
176*4882a593Smuzhiyun };
177*4882a593Smuzhiyun 
178*4882a593Smuzhiyun /**
179*4882a593Smuzhiyun  * enum dim_step_result - DIM algorithm step results
180*4882a593Smuzhiyun  *
181*4882a593Smuzhiyun  * These describe the result of a step.
182*4882a593Smuzhiyun  *
183*4882a593Smuzhiyun  * @DIM_STEPPED: Performed a regular step
184*4882a593Smuzhiyun  * @DIM_TOO_TIRED: Same kind of step was done multiple times - should go to
185*4882a593Smuzhiyun  * tired parking
186*4882a593Smuzhiyun  * @DIM_ON_EDGE: Stepped to the most left/right profile
187*4882a593Smuzhiyun  */
188*4882a593Smuzhiyun enum dim_step_result {
189*4882a593Smuzhiyun 	DIM_STEPPED,
190*4882a593Smuzhiyun 	DIM_TOO_TIRED,
191*4882a593Smuzhiyun 	DIM_ON_EDGE,
192*4882a593Smuzhiyun };
193*4882a593Smuzhiyun 
194*4882a593Smuzhiyun /**
195*4882a593Smuzhiyun  *	dim_on_top - check if current state is a good place to stop (top location)
196*4882a593Smuzhiyun  *	@dim: DIM context
197*4882a593Smuzhiyun  *
198*4882a593Smuzhiyun  * Check if current profile is a good place to park at.
199*4882a593Smuzhiyun  * This will result in reducing the DIM checks frequency as we assume we
200*4882a593Smuzhiyun  * shouldn't probably change profiles, unless traffic pattern wasn't changed.
201*4882a593Smuzhiyun  */
202*4882a593Smuzhiyun bool dim_on_top(struct dim *dim);
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun /**
205*4882a593Smuzhiyun  *	dim_turn - change profile altering direction
206*4882a593Smuzhiyun  *	@dim: DIM context
207*4882a593Smuzhiyun  *
208*4882a593Smuzhiyun  * Go left if we were going right and vice-versa.
209*4882a593Smuzhiyun  * Do nothing if currently parking.
210*4882a593Smuzhiyun  */
211*4882a593Smuzhiyun void dim_turn(struct dim *dim);
212*4882a593Smuzhiyun 
213*4882a593Smuzhiyun /**
214*4882a593Smuzhiyun  *	dim_park_on_top - enter a parking state on a top location
215*4882a593Smuzhiyun  *	@dim: DIM context
216*4882a593Smuzhiyun  *
217*4882a593Smuzhiyun  * Enter parking state.
218*4882a593Smuzhiyun  * Clear all movement history.
219*4882a593Smuzhiyun  */
220*4882a593Smuzhiyun void dim_park_on_top(struct dim *dim);
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun /**
223*4882a593Smuzhiyun  *	dim_park_tired - enter a tired parking state
224*4882a593Smuzhiyun  *	@dim: DIM context
225*4882a593Smuzhiyun  *
226*4882a593Smuzhiyun  * Enter parking state.
227*4882a593Smuzhiyun  * Clear all movement history and cause DIM checks frequency to reduce.
228*4882a593Smuzhiyun  */
229*4882a593Smuzhiyun void dim_park_tired(struct dim *dim);
230*4882a593Smuzhiyun 
231*4882a593Smuzhiyun /**
232*4882a593Smuzhiyun  *	dim_calc_stats - calculate the difference between two samples
233*4882a593Smuzhiyun  *	@start: start sample
234*4882a593Smuzhiyun  *	@end: end sample
235*4882a593Smuzhiyun  *	@curr_stats: delta between samples
236*4882a593Smuzhiyun  *
237*4882a593Smuzhiyun  * Calculate the delta between two samples (in data rates).
238*4882a593Smuzhiyun  * Takes into consideration counter wrap-around.
239*4882a593Smuzhiyun  */
240*4882a593Smuzhiyun void dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
241*4882a593Smuzhiyun 		    struct dim_stats *curr_stats);
242*4882a593Smuzhiyun 
243*4882a593Smuzhiyun /**
244*4882a593Smuzhiyun  *	dim_update_sample - set a sample's fields with given values
245*4882a593Smuzhiyun  *	@event_ctr: number of events to set
246*4882a593Smuzhiyun  *	@packets: number of packets to set
247*4882a593Smuzhiyun  *	@bytes: number of bytes to set
248*4882a593Smuzhiyun  *	@s: DIM sample
249*4882a593Smuzhiyun  */
250*4882a593Smuzhiyun static inline void
dim_update_sample(u16 event_ctr,u64 packets,u64 bytes,struct dim_sample * s)251*4882a593Smuzhiyun dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, struct dim_sample *s)
252*4882a593Smuzhiyun {
253*4882a593Smuzhiyun 	s->time	     = ktime_get();
254*4882a593Smuzhiyun 	s->pkt_ctr   = packets;
255*4882a593Smuzhiyun 	s->byte_ctr  = bytes;
256*4882a593Smuzhiyun 	s->event_ctr = event_ctr;
257*4882a593Smuzhiyun }
258*4882a593Smuzhiyun 
259*4882a593Smuzhiyun /**
260*4882a593Smuzhiyun  *	dim_update_sample_with_comps - set a sample's fields with given
261*4882a593Smuzhiyun  *	values including the completion parameter
262*4882a593Smuzhiyun  *	@event_ctr: number of events to set
263*4882a593Smuzhiyun  *	@packets: number of packets to set
264*4882a593Smuzhiyun  *	@bytes: number of bytes to set
265*4882a593Smuzhiyun  *	@comps: number of completions to set
266*4882a593Smuzhiyun  *	@s: DIM sample
267*4882a593Smuzhiyun  */
268*4882a593Smuzhiyun static inline void
dim_update_sample_with_comps(u16 event_ctr,u64 packets,u64 bytes,u64 comps,struct dim_sample * s)269*4882a593Smuzhiyun dim_update_sample_with_comps(u16 event_ctr, u64 packets, u64 bytes, u64 comps,
270*4882a593Smuzhiyun 			     struct dim_sample *s)
271*4882a593Smuzhiyun {
272*4882a593Smuzhiyun 	dim_update_sample(event_ctr, packets, bytes, s);
273*4882a593Smuzhiyun 	s->comp_ctr = comps;
274*4882a593Smuzhiyun }
275*4882a593Smuzhiyun 
276*4882a593Smuzhiyun /* Net DIM */
277*4882a593Smuzhiyun 
278*4882a593Smuzhiyun /**
279*4882a593Smuzhiyun  *	net_dim_get_rx_moderation - provide a CQ moderation object for the given RX profile
280*4882a593Smuzhiyun  *	@cq_period_mode: CQ period mode
281*4882a593Smuzhiyun  *	@ix: Profile index
282*4882a593Smuzhiyun  */
283*4882a593Smuzhiyun struct dim_cq_moder net_dim_get_rx_moderation(u8 cq_period_mode, int ix);
284*4882a593Smuzhiyun 
285*4882a593Smuzhiyun /**
286*4882a593Smuzhiyun  *	net_dim_get_def_rx_moderation - provide the default RX moderation
287*4882a593Smuzhiyun  *	@cq_period_mode: CQ period mode
288*4882a593Smuzhiyun  */
289*4882a593Smuzhiyun struct dim_cq_moder net_dim_get_def_rx_moderation(u8 cq_period_mode);
290*4882a593Smuzhiyun 
291*4882a593Smuzhiyun /**
292*4882a593Smuzhiyun  *	net_dim_get_tx_moderation - provide a CQ moderation object for the given TX profile
293*4882a593Smuzhiyun  *	@cq_period_mode: CQ period mode
294*4882a593Smuzhiyun  *	@ix: Profile index
295*4882a593Smuzhiyun  */
296*4882a593Smuzhiyun struct dim_cq_moder net_dim_get_tx_moderation(u8 cq_period_mode, int ix);
297*4882a593Smuzhiyun 
298*4882a593Smuzhiyun /**
299*4882a593Smuzhiyun  *	net_dim_get_def_tx_moderation - provide the default TX moderation
300*4882a593Smuzhiyun  *	@cq_period_mode: CQ period mode
301*4882a593Smuzhiyun  */
302*4882a593Smuzhiyun struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode);
303*4882a593Smuzhiyun 
304*4882a593Smuzhiyun /**
305*4882a593Smuzhiyun  *	net_dim - main DIM algorithm entry point
306*4882a593Smuzhiyun  *	@dim: DIM instance information
307*4882a593Smuzhiyun  *	@end_sample: Current data measurement
308*4882a593Smuzhiyun  *
309*4882a593Smuzhiyun  * Called by the consumer.
310*4882a593Smuzhiyun  * This is the main logic of the algorithm, where data is processed in order
311*4882a593Smuzhiyun  * to decide on next required action.
312*4882a593Smuzhiyun  */
313*4882a593Smuzhiyun void net_dim(struct dim *dim, struct dim_sample end_sample);
314*4882a593Smuzhiyun 
315*4882a593Smuzhiyun /* RDMA DIM */
316*4882a593Smuzhiyun 
317*4882a593Smuzhiyun /*
318*4882a593Smuzhiyun  * RDMA DIM profile:
319*4882a593Smuzhiyun  * profile size must be of RDMA_DIM_PARAMS_NUM_PROFILES.
320*4882a593Smuzhiyun  */
321*4882a593Smuzhiyun #define RDMA_DIM_PARAMS_NUM_PROFILES 9
322*4882a593Smuzhiyun #define RDMA_DIM_START_PROFILE 0
323*4882a593Smuzhiyun 
324*4882a593Smuzhiyun /**
325*4882a593Smuzhiyun  * rdma_dim - Runs the adaptive moderation.
326*4882a593Smuzhiyun  * @dim: The moderation struct.
327*4882a593Smuzhiyun  * @completions: The number of completions collected in this round.
328*4882a593Smuzhiyun  *
329*4882a593Smuzhiyun  * Each call to rdma_dim takes the latest amount of completions that
330*4882a593Smuzhiyun  * have been collected and counts them as a new event.
331*4882a593Smuzhiyun  * Once enough events have been collected the algorithm decides a new
332*4882a593Smuzhiyun  * moderation level.
333*4882a593Smuzhiyun  */
334*4882a593Smuzhiyun void rdma_dim(struct dim *dim, u64 completions);
335*4882a593Smuzhiyun 
336*4882a593Smuzhiyun #endif /* DIM_H */
337