xref: /OK3568_Linux_fs/kernel/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <linux/netdevice.h>
5 #include <net/nexthop.h>
6 #include "lag.h"
7 #include "lag_mp.h"
8 #include "mlx5_core.h"
9 #include "eswitch.h"
10 #include "lib/mlx5.h"
11 
mlx5_lag_multipath_check_prereq(struct mlx5_lag * ldev)12 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
13 {
14 	if (!mlx5_lag_is_ready(ldev))
15 		return false;
16 
17 	return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
18 					 ldev->pf[MLX5_LAG_P2].dev);
19 }
20 
__mlx5_lag_is_multipath(struct mlx5_lag * ldev)21 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
22 {
23 	return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
24 }
25 
mlx5_lag_is_multipath(struct mlx5_core_dev * dev)26 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
27 {
28 	struct mlx5_lag *ldev;
29 	bool res;
30 
31 	ldev = mlx5_lag_dev_get(dev);
32 	res  = ldev && __mlx5_lag_is_multipath(ldev);
33 
34 	return res;
35 }
36 
37 /**
38  * Set lag port affinity
39  *
40  * @ldev: lag device
41  * @port:
42  *     0 - set normal affinity.
43  *     1 - set affinity to port 1.
44  *     2 - set affinity to port 2.
45  *
46  **/
mlx5_lag_set_port_affinity(struct mlx5_lag * ldev,enum mlx5_lag_port_affinity port)47 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
48 				       enum mlx5_lag_port_affinity port)
49 {
50 	struct lag_tracker tracker;
51 
52 	if (!__mlx5_lag_is_multipath(ldev))
53 		return;
54 
55 	switch (port) {
56 	case MLX5_LAG_NORMAL_AFFINITY:
57 		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
58 		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
59 		tracker.netdev_state[MLX5_LAG_P1].link_up = true;
60 		tracker.netdev_state[MLX5_LAG_P2].link_up = true;
61 		break;
62 	case MLX5_LAG_P1_AFFINITY:
63 		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
64 		tracker.netdev_state[MLX5_LAG_P1].link_up = true;
65 		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
66 		tracker.netdev_state[MLX5_LAG_P2].link_up = false;
67 		break;
68 	case MLX5_LAG_P2_AFFINITY:
69 		tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
70 		tracker.netdev_state[MLX5_LAG_P1].link_up = false;
71 		tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
72 		tracker.netdev_state[MLX5_LAG_P2].link_up = true;
73 		break;
74 	default:
75 		mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
76 			       "Invalid affinity port %d", port);
77 		return;
78 	}
79 
80 	if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
81 		mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
82 					 MLX5_DEV_EVENT_PORT_AFFINITY,
83 					 (void *)0);
84 
85 	if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
86 		mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
87 					 MLX5_DEV_EVENT_PORT_AFFINITY,
88 					 (void *)0);
89 
90 	mlx5_modify_lag(ldev, &tracker);
91 }
92 
mlx5_lag_fib_event_flush(struct notifier_block * nb)93 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
94 {
95 	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
96 
97 	flush_workqueue(mp->wq);
98 }
99 
100 struct mlx5_fib_event_work {
101 	struct work_struct work;
102 	struct mlx5_lag *ldev;
103 	unsigned long event;
104 	union {
105 		struct fib_entry_notifier_info fen_info;
106 		struct fib_nh_notifier_info fnh_info;
107 	};
108 };
109 
mlx5_lag_fib_route_event(struct mlx5_lag * ldev,unsigned long event,struct fib_info * fi)110 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
111 				     unsigned long event,
112 				     struct fib_info *fi)
113 {
114 	struct lag_mp *mp = &ldev->lag_mp;
115 	struct fib_nh *fib_nh0, *fib_nh1;
116 	unsigned int nhs;
117 
118 	/* Handle delete event */
119 	if (event == FIB_EVENT_ENTRY_DEL) {
120 		/* stop track */
121 		if (mp->mfi == fi)
122 			mp->mfi = NULL;
123 		return;
124 	}
125 
126 	/* Handle multipath entry with lower priority value */
127 	if (mp->mfi && mp->mfi != fi && fi->fib_priority >= mp->mfi->fib_priority)
128 		return;
129 
130 	/* Handle add/replace event */
131 	nhs = fib_info_num_path(fi);
132 	if (nhs == 1) {
133 		if (__mlx5_lag_is_active(ldev)) {
134 			struct fib_nh *nh = fib_info_nh(fi, 0);
135 			struct net_device *nh_dev = nh->fib_nh_dev;
136 			int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
137 
138 			if (i < 0)
139 				return;
140 
141 			i++;
142 			mlx5_lag_set_port_affinity(ldev, i);
143 		}
144 
145 		mp->mfi = fi;
146 		return;
147 	}
148 
149 	if (nhs != 2)
150 		return;
151 
152 	/* Verify next hops are ports of the same hca */
153 	fib_nh0 = fib_info_nh(fi, 0);
154 	fib_nh1 = fib_info_nh(fi, 1);
155 	if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev &&
156 	      fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) &&
157 	    !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev &&
158 	      fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) {
159 		mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
160 			       "Multipath offload require two ports of the same HCA\n");
161 		return;
162 	}
163 
164 	/* First time we see multipath route */
165 	if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
166 		struct lag_tracker tracker;
167 
168 		tracker = ldev->tracker;
169 		mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
170 	}
171 
172 	mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
173 	mp->mfi = fi;
174 }
175 
mlx5_lag_fib_nexthop_event(struct mlx5_lag * ldev,unsigned long event,struct fib_nh * fib_nh,struct fib_info * fi)176 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
177 				       unsigned long event,
178 				       struct fib_nh *fib_nh,
179 				       struct fib_info *fi)
180 {
181 	struct lag_mp *mp = &ldev->lag_mp;
182 
183 	/* Check the nh event is related to the route */
184 	if (!mp->mfi || mp->mfi != fi)
185 		return;
186 
187 	/* nh added/removed */
188 	if (event == FIB_EVENT_NH_DEL) {
189 		int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
190 
191 		if (i >= 0) {
192 			i = (i + 1) % 2 + 1; /* peer port */
193 			mlx5_lag_set_port_affinity(ldev, i);
194 		}
195 	} else if (event == FIB_EVENT_NH_ADD &&
196 		   fib_info_num_path(fi) == 2) {
197 		mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
198 	}
199 }
200 
mlx5_lag_fib_update(struct work_struct * work)201 static void mlx5_lag_fib_update(struct work_struct *work)
202 {
203 	struct mlx5_fib_event_work *fib_work =
204 		container_of(work, struct mlx5_fib_event_work, work);
205 	struct mlx5_lag *ldev = fib_work->ldev;
206 	struct fib_nh *fib_nh;
207 
208 	/* Protect internal structures from changes */
209 	rtnl_lock();
210 	switch (fib_work->event) {
211 	case FIB_EVENT_ENTRY_REPLACE:
212 	case FIB_EVENT_ENTRY_DEL:
213 		mlx5_lag_fib_route_event(ldev, fib_work->event,
214 					 fib_work->fen_info.fi);
215 		fib_info_put(fib_work->fen_info.fi);
216 		break;
217 	case FIB_EVENT_NH_ADD:
218 	case FIB_EVENT_NH_DEL:
219 		fib_nh = fib_work->fnh_info.fib_nh;
220 		mlx5_lag_fib_nexthop_event(ldev,
221 					   fib_work->event,
222 					   fib_work->fnh_info.fib_nh,
223 					   fib_nh->nh_parent);
224 		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
225 		break;
226 	}
227 
228 	rtnl_unlock();
229 	kfree(fib_work);
230 }
231 
232 static struct mlx5_fib_event_work *
mlx5_lag_init_fib_work(struct mlx5_lag * ldev,unsigned long event)233 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
234 {
235 	struct mlx5_fib_event_work *fib_work;
236 
237 	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
238 	if (WARN_ON(!fib_work))
239 		return NULL;
240 
241 	INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
242 	fib_work->ldev = ldev;
243 	fib_work->event = event;
244 
245 	return fib_work;
246 }
247 
mlx5_lag_fib_event(struct notifier_block * nb,unsigned long event,void * ptr)248 static int mlx5_lag_fib_event(struct notifier_block *nb,
249 			      unsigned long event,
250 			      void *ptr)
251 {
252 	struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
253 	struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
254 	struct fib_notifier_info *info = ptr;
255 	struct mlx5_fib_event_work *fib_work;
256 	struct fib_entry_notifier_info *fen_info;
257 	struct fib_nh_notifier_info *fnh_info;
258 	struct net_device *fib_dev;
259 	struct fib_info *fi;
260 
261 	if (info->family != AF_INET)
262 		return NOTIFY_DONE;
263 
264 	if (!mlx5_lag_multipath_check_prereq(ldev))
265 		return NOTIFY_DONE;
266 
267 	switch (event) {
268 	case FIB_EVENT_ENTRY_REPLACE:
269 	case FIB_EVENT_ENTRY_DEL:
270 		fen_info = container_of(info, struct fib_entry_notifier_info,
271 					info);
272 		fi = fen_info->fi;
273 		if (fi->nh)
274 			return NOTIFY_DONE;
275 		fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
276 		if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
277 		    fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
278 			return NOTIFY_DONE;
279 		}
280 		fib_work = mlx5_lag_init_fib_work(ldev, event);
281 		if (!fib_work)
282 			return NOTIFY_DONE;
283 		fib_work->fen_info = *fen_info;
284 		/* Take reference on fib_info to prevent it from being
285 		 * freed while work is queued. Release it afterwards.
286 		 */
287 		fib_info_hold(fib_work->fen_info.fi);
288 		break;
289 	case FIB_EVENT_NH_ADD:
290 	case FIB_EVENT_NH_DEL:
291 		fnh_info = container_of(info, struct fib_nh_notifier_info,
292 					info);
293 		fib_work = mlx5_lag_init_fib_work(ldev, event);
294 		if (!fib_work)
295 			return NOTIFY_DONE;
296 		fib_work->fnh_info = *fnh_info;
297 		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
298 		break;
299 	default:
300 		return NOTIFY_DONE;
301 	}
302 
303 	queue_work(mp->wq, &fib_work->work);
304 
305 	return NOTIFY_DONE;
306 }
307 
mlx5_lag_mp_init(struct mlx5_lag * ldev)308 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
309 {
310 	struct lag_mp *mp = &ldev->lag_mp;
311 	int err;
312 
313 	/* always clear mfi, as it might become stale when a route delete event
314 	 * has been missed
315 	 */
316 	mp->mfi = NULL;
317 
318 	if (mp->fib_nb.notifier_call)
319 		return 0;
320 
321 	mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
322 	if (!mp->wq)
323 		return -ENOMEM;
324 
325 	mp->fib_nb.notifier_call = mlx5_lag_fib_event;
326 	err = register_fib_notifier(&init_net, &mp->fib_nb,
327 				    mlx5_lag_fib_event_flush, NULL);
328 	if (err) {
329 		destroy_workqueue(mp->wq);
330 		mp->fib_nb.notifier_call = NULL;
331 	}
332 
333 	return err;
334 }
335 
mlx5_lag_mp_cleanup(struct mlx5_lag * ldev)336 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
337 {
338 	struct lag_mp *mp = &ldev->lag_mp;
339 
340 	if (!mp->fib_nb.notifier_call)
341 		return;
342 
343 	unregister_fib_notifier(&init_net, &mp->fib_nb);
344 	destroy_workqueue(mp->wq);
345 	mp->fib_nb.notifier_call = NULL;
346 	mp->mfi = NULL;
347 }
348