1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3
4 #include <linux/netdevice.h>
5 #include <net/nexthop.h>
6 #include "lag.h"
7 #include "lag_mp.h"
8 #include "mlx5_core.h"
9 #include "eswitch.h"
10 #include "lib/mlx5.h"
11
mlx5_lag_multipath_check_prereq(struct mlx5_lag * ldev)12 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
13 {
14 if (!mlx5_lag_is_ready(ldev))
15 return false;
16
17 return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
18 ldev->pf[MLX5_LAG_P2].dev);
19 }
20
__mlx5_lag_is_multipath(struct mlx5_lag * ldev)21 static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
22 {
23 return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
24 }
25
mlx5_lag_is_multipath(struct mlx5_core_dev * dev)26 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
27 {
28 struct mlx5_lag *ldev;
29 bool res;
30
31 ldev = mlx5_lag_dev_get(dev);
32 res = ldev && __mlx5_lag_is_multipath(ldev);
33
34 return res;
35 }
36
37 /**
38 * Set lag port affinity
39 *
40 * @ldev: lag device
41 * @port:
42 * 0 - set normal affinity.
43 * 1 - set affinity to port 1.
44 * 2 - set affinity to port 2.
45 *
46 **/
mlx5_lag_set_port_affinity(struct mlx5_lag * ldev,enum mlx5_lag_port_affinity port)47 static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
48 enum mlx5_lag_port_affinity port)
49 {
50 struct lag_tracker tracker;
51
52 if (!__mlx5_lag_is_multipath(ldev))
53 return;
54
55 switch (port) {
56 case MLX5_LAG_NORMAL_AFFINITY:
57 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
58 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
59 tracker.netdev_state[MLX5_LAG_P1].link_up = true;
60 tracker.netdev_state[MLX5_LAG_P2].link_up = true;
61 break;
62 case MLX5_LAG_P1_AFFINITY:
63 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
64 tracker.netdev_state[MLX5_LAG_P1].link_up = true;
65 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
66 tracker.netdev_state[MLX5_LAG_P2].link_up = false;
67 break;
68 case MLX5_LAG_P2_AFFINITY:
69 tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
70 tracker.netdev_state[MLX5_LAG_P1].link_up = false;
71 tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
72 tracker.netdev_state[MLX5_LAG_P2].link_up = true;
73 break;
74 default:
75 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
76 "Invalid affinity port %d", port);
77 return;
78 }
79
80 if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
81 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
82 MLX5_DEV_EVENT_PORT_AFFINITY,
83 (void *)0);
84
85 if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
86 mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
87 MLX5_DEV_EVENT_PORT_AFFINITY,
88 (void *)0);
89
90 mlx5_modify_lag(ldev, &tracker);
91 }
92
mlx5_lag_fib_event_flush(struct notifier_block * nb)93 static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
94 {
95 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
96
97 flush_workqueue(mp->wq);
98 }
99
100 struct mlx5_fib_event_work {
101 struct work_struct work;
102 struct mlx5_lag *ldev;
103 unsigned long event;
104 union {
105 struct fib_entry_notifier_info fen_info;
106 struct fib_nh_notifier_info fnh_info;
107 };
108 };
109
mlx5_lag_fib_route_event(struct mlx5_lag * ldev,unsigned long event,struct fib_info * fi)110 static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
111 unsigned long event,
112 struct fib_info *fi)
113 {
114 struct lag_mp *mp = &ldev->lag_mp;
115 struct fib_nh *fib_nh0, *fib_nh1;
116 unsigned int nhs;
117
118 /* Handle delete event */
119 if (event == FIB_EVENT_ENTRY_DEL) {
120 /* stop track */
121 if (mp->mfi == fi)
122 mp->mfi = NULL;
123 return;
124 }
125
126 /* Handle multipath entry with lower priority value */
127 if (mp->mfi && mp->mfi != fi && fi->fib_priority >= mp->mfi->fib_priority)
128 return;
129
130 /* Handle add/replace event */
131 nhs = fib_info_num_path(fi);
132 if (nhs == 1) {
133 if (__mlx5_lag_is_active(ldev)) {
134 struct fib_nh *nh = fib_info_nh(fi, 0);
135 struct net_device *nh_dev = nh->fib_nh_dev;
136 int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
137
138 if (i < 0)
139 return;
140
141 i++;
142 mlx5_lag_set_port_affinity(ldev, i);
143 }
144
145 mp->mfi = fi;
146 return;
147 }
148
149 if (nhs != 2)
150 return;
151
152 /* Verify next hops are ports of the same hca */
153 fib_nh0 = fib_info_nh(fi, 0);
154 fib_nh1 = fib_info_nh(fi, 1);
155 if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev &&
156 fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) &&
157 !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev &&
158 fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) {
159 mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
160 "Multipath offload require two ports of the same HCA\n");
161 return;
162 }
163
164 /* First time we see multipath route */
165 if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
166 struct lag_tracker tracker;
167
168 tracker = ldev->tracker;
169 mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH);
170 }
171
172 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
173 mp->mfi = fi;
174 }
175
mlx5_lag_fib_nexthop_event(struct mlx5_lag * ldev,unsigned long event,struct fib_nh * fib_nh,struct fib_info * fi)176 static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
177 unsigned long event,
178 struct fib_nh *fib_nh,
179 struct fib_info *fi)
180 {
181 struct lag_mp *mp = &ldev->lag_mp;
182
183 /* Check the nh event is related to the route */
184 if (!mp->mfi || mp->mfi != fi)
185 return;
186
187 /* nh added/removed */
188 if (event == FIB_EVENT_NH_DEL) {
189 int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
190
191 if (i >= 0) {
192 i = (i + 1) % 2 + 1; /* peer port */
193 mlx5_lag_set_port_affinity(ldev, i);
194 }
195 } else if (event == FIB_EVENT_NH_ADD &&
196 fib_info_num_path(fi) == 2) {
197 mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
198 }
199 }
200
mlx5_lag_fib_update(struct work_struct * work)201 static void mlx5_lag_fib_update(struct work_struct *work)
202 {
203 struct mlx5_fib_event_work *fib_work =
204 container_of(work, struct mlx5_fib_event_work, work);
205 struct mlx5_lag *ldev = fib_work->ldev;
206 struct fib_nh *fib_nh;
207
208 /* Protect internal structures from changes */
209 rtnl_lock();
210 switch (fib_work->event) {
211 case FIB_EVENT_ENTRY_REPLACE:
212 case FIB_EVENT_ENTRY_DEL:
213 mlx5_lag_fib_route_event(ldev, fib_work->event,
214 fib_work->fen_info.fi);
215 fib_info_put(fib_work->fen_info.fi);
216 break;
217 case FIB_EVENT_NH_ADD:
218 case FIB_EVENT_NH_DEL:
219 fib_nh = fib_work->fnh_info.fib_nh;
220 mlx5_lag_fib_nexthop_event(ldev,
221 fib_work->event,
222 fib_work->fnh_info.fib_nh,
223 fib_nh->nh_parent);
224 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
225 break;
226 }
227
228 rtnl_unlock();
229 kfree(fib_work);
230 }
231
232 static struct mlx5_fib_event_work *
mlx5_lag_init_fib_work(struct mlx5_lag * ldev,unsigned long event)233 mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
234 {
235 struct mlx5_fib_event_work *fib_work;
236
237 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
238 if (WARN_ON(!fib_work))
239 return NULL;
240
241 INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
242 fib_work->ldev = ldev;
243 fib_work->event = event;
244
245 return fib_work;
246 }
247
mlx5_lag_fib_event(struct notifier_block * nb,unsigned long event,void * ptr)248 static int mlx5_lag_fib_event(struct notifier_block *nb,
249 unsigned long event,
250 void *ptr)
251 {
252 struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
253 struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
254 struct fib_notifier_info *info = ptr;
255 struct mlx5_fib_event_work *fib_work;
256 struct fib_entry_notifier_info *fen_info;
257 struct fib_nh_notifier_info *fnh_info;
258 struct net_device *fib_dev;
259 struct fib_info *fi;
260
261 if (info->family != AF_INET)
262 return NOTIFY_DONE;
263
264 if (!mlx5_lag_multipath_check_prereq(ldev))
265 return NOTIFY_DONE;
266
267 switch (event) {
268 case FIB_EVENT_ENTRY_REPLACE:
269 case FIB_EVENT_ENTRY_DEL:
270 fen_info = container_of(info, struct fib_entry_notifier_info,
271 info);
272 fi = fen_info->fi;
273 if (fi->nh)
274 return NOTIFY_DONE;
275 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
276 if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
277 fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
278 return NOTIFY_DONE;
279 }
280 fib_work = mlx5_lag_init_fib_work(ldev, event);
281 if (!fib_work)
282 return NOTIFY_DONE;
283 fib_work->fen_info = *fen_info;
284 /* Take reference on fib_info to prevent it from being
285 * freed while work is queued. Release it afterwards.
286 */
287 fib_info_hold(fib_work->fen_info.fi);
288 break;
289 case FIB_EVENT_NH_ADD:
290 case FIB_EVENT_NH_DEL:
291 fnh_info = container_of(info, struct fib_nh_notifier_info,
292 info);
293 fib_work = mlx5_lag_init_fib_work(ldev, event);
294 if (!fib_work)
295 return NOTIFY_DONE;
296 fib_work->fnh_info = *fnh_info;
297 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
298 break;
299 default:
300 return NOTIFY_DONE;
301 }
302
303 queue_work(mp->wq, &fib_work->work);
304
305 return NOTIFY_DONE;
306 }
307
mlx5_lag_mp_init(struct mlx5_lag * ldev)308 int mlx5_lag_mp_init(struct mlx5_lag *ldev)
309 {
310 struct lag_mp *mp = &ldev->lag_mp;
311 int err;
312
313 /* always clear mfi, as it might become stale when a route delete event
314 * has been missed
315 */
316 mp->mfi = NULL;
317
318 if (mp->fib_nb.notifier_call)
319 return 0;
320
321 mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
322 if (!mp->wq)
323 return -ENOMEM;
324
325 mp->fib_nb.notifier_call = mlx5_lag_fib_event;
326 err = register_fib_notifier(&init_net, &mp->fib_nb,
327 mlx5_lag_fib_event_flush, NULL);
328 if (err) {
329 destroy_workqueue(mp->wq);
330 mp->fib_nb.notifier_call = NULL;
331 }
332
333 return err;
334 }
335
mlx5_lag_mp_cleanup(struct mlx5_lag * ldev)336 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
337 {
338 struct lag_mp *mp = &ldev->lag_mp;
339
340 if (!mp->fib_nb.notifier_call)
341 return;
342
343 unregister_fib_notifier(&init_net, &mp->fib_nb);
344 destroy_workqueue(mp->wq);
345 mp->fib_nb.notifier_call = NULL;
346 mp->mfi = NULL;
347 }
348