xref: /OK3568_Linux_fs/kernel/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3 
4 #include <net/netfilter/nf_conntrack.h>
5 #include <net/netfilter/nf_conntrack_core.h>
6 #include <net/netfilter/nf_conntrack_zones.h>
7 #include <net/netfilter/nf_conntrack_labels.h>
8 #include <net/netfilter/nf_conntrack_helper.h>
9 #include <net/netfilter/nf_conntrack_acct.h>
10 #include <uapi/linux/tc_act/tc_pedit.h>
11 #include <net/tc_act/tc_ct.h>
12 #include <net/flow_offload.h>
13 #include <net/netfilter/nf_flow_table.h>
14 #include <linux/workqueue.h>
15 #include <linux/refcount.h>
16 #include <linux/xarray.h>
17 
18 #include "lib/fs_chains.h"
19 #include "en/tc_ct.h"
20 #include "en/mod_hdr.h"
21 #include "en/mapping.h"
22 #include "en.h"
23 #include "en_tc.h"
24 #include "en_rep.h"
25 
26 #define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8)
27 #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
28 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
29 #define MLX5_CT_STATE_TRK_BIT BIT(2)
30 #define MLX5_CT_STATE_NAT_BIT BIT(3)
31 
32 #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
33 #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
34 #define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX
35 
36 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen * 8)
37 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
38 
39 #define ct_dbg(fmt, args...)\
40 	netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
41 
42 struct mlx5_tc_ct_priv {
43 	struct mlx5_core_dev *dev;
44 	const struct net_device *netdev;
45 	struct mod_hdr_tbl *mod_hdr_tbl;
46 	struct idr fte_ids;
47 	struct xarray tuple_ids;
48 	struct rhashtable zone_ht;
49 	struct rhashtable ct_tuples_ht;
50 	struct rhashtable ct_tuples_nat_ht;
51 	struct mlx5_flow_table *ct;
52 	struct mlx5_flow_table *ct_nat;
53 	struct mlx5_flow_table *post_ct;
54 	struct mutex control_lock; /* guards parallel adds/dels */
55 	struct mapping_ctx *zone_mapping;
56 	struct mapping_ctx *labels_mapping;
57 	enum mlx5_flow_namespace_type ns_type;
58 	struct mlx5_fs_chains *chains;
59 	spinlock_t ht_lock; /* protects ft entries */
60 };
61 
62 struct mlx5_ct_flow {
63 	struct mlx5_flow_attr *pre_ct_attr;
64 	struct mlx5_flow_attr *post_ct_attr;
65 	struct mlx5_flow_handle *pre_ct_rule;
66 	struct mlx5_flow_handle *post_ct_rule;
67 	struct mlx5_ct_ft *ft;
68 	u32 fte_id;
69 	u32 chain_mapping;
70 };
71 
72 struct mlx5_ct_zone_rule {
73 	struct mlx5_flow_handle *rule;
74 	struct mlx5e_mod_hdr_handle *mh;
75 	struct mlx5_flow_attr *attr;
76 	bool nat;
77 };
78 
79 struct mlx5_tc_ct_pre {
80 	struct mlx5_flow_table *ft;
81 	struct mlx5_flow_group *flow_grp;
82 	struct mlx5_flow_group *miss_grp;
83 	struct mlx5_flow_handle *flow_rule;
84 	struct mlx5_flow_handle *miss_rule;
85 	struct mlx5_modify_hdr *modify_hdr;
86 };
87 
88 struct mlx5_ct_ft {
89 	struct rhash_head node;
90 	u16 zone;
91 	u32 zone_restore_id;
92 	refcount_t refcount;
93 	struct nf_flowtable *nf_ft;
94 	struct mlx5_tc_ct_priv *ct_priv;
95 	struct rhashtable ct_entries_ht;
96 	struct mlx5_tc_ct_pre pre_ct;
97 	struct mlx5_tc_ct_pre pre_ct_nat;
98 };
99 
100 struct mlx5_ct_tuple {
101 	u16 addr_type;
102 	__be16 n_proto;
103 	u8 ip_proto;
104 	struct {
105 		union {
106 			__be32 src_v4;
107 			struct in6_addr src_v6;
108 		};
109 		union {
110 			__be32 dst_v4;
111 			struct in6_addr dst_v6;
112 		};
113 	} ip;
114 	struct {
115 		__be16 src;
116 		__be16 dst;
117 	} port;
118 
119 	u16 zone;
120 };
121 
122 struct mlx5_ct_counter {
123 	struct mlx5_fc *counter;
124 	refcount_t refcount;
125 	bool is_shared;
126 };
127 
128 enum {
129 	MLX5_CT_ENTRY_FLAG_VALID,
130 };
131 
132 struct mlx5_ct_entry {
133 	struct rhash_head node;
134 	struct rhash_head tuple_node;
135 	struct rhash_head tuple_nat_node;
136 	struct mlx5_ct_counter *counter;
137 	unsigned long cookie;
138 	unsigned long restore_cookie;
139 	struct mlx5_ct_tuple tuple;
140 	struct mlx5_ct_tuple tuple_nat;
141 	struct mlx5_ct_zone_rule zone_rules[2];
142 
143 	struct mlx5_tc_ct_priv *ct_priv;
144 	struct work_struct work;
145 
146 	refcount_t refcnt;
147 	unsigned long flags;
148 };
149 
150 static const struct rhashtable_params cts_ht_params = {
151 	.head_offset = offsetof(struct mlx5_ct_entry, node),
152 	.key_offset = offsetof(struct mlx5_ct_entry, cookie),
153 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
154 	.automatic_shrinking = true,
155 	.min_size = 16 * 1024,
156 };
157 
158 static const struct rhashtable_params zone_params = {
159 	.head_offset = offsetof(struct mlx5_ct_ft, node),
160 	.key_offset = offsetof(struct mlx5_ct_ft, zone),
161 	.key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
162 	.automatic_shrinking = true,
163 };
164 
165 static const struct rhashtable_params tuples_ht_params = {
166 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
167 	.key_offset = offsetof(struct mlx5_ct_entry, tuple),
168 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
169 	.automatic_shrinking = true,
170 	.min_size = 16 * 1024,
171 };
172 
173 static const struct rhashtable_params tuples_nat_ht_params = {
174 	.head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
175 	.key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
176 	.key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
177 	.automatic_shrinking = true,
178 	.min_size = 16 * 1024,
179 };
180 
181 static bool
mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry * entry)182 mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
183 {
184 	return !!(entry->tuple_nat_node.next);
185 }
186 
187 static int
mlx5_get_label_mapping(struct mlx5_tc_ct_priv * ct_priv,u32 * labels,u32 * id)188 mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv,
189 		       u32 *labels, u32 *id)
190 {
191 	if (!memchr_inv(labels, 0, sizeof(u32) * 4)) {
192 		*id = 0;
193 		return 0;
194 	}
195 
196 	if (mapping_add(ct_priv->labels_mapping, labels, id))
197 		return -EOPNOTSUPP;
198 
199 	return 0;
200 }
201 
202 static void
mlx5_put_label_mapping(struct mlx5_tc_ct_priv * ct_priv,u32 id)203 mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id)
204 {
205 	if (id)
206 		mapping_remove(ct_priv->labels_mapping, id);
207 }
208 
209 static int
mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple * tuple,struct flow_rule * rule)210 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
211 {
212 	struct flow_match_control control;
213 	struct flow_match_basic basic;
214 
215 	flow_rule_match_basic(rule, &basic);
216 	flow_rule_match_control(rule, &control);
217 
218 	tuple->n_proto = basic.key->n_proto;
219 	tuple->ip_proto = basic.key->ip_proto;
220 	tuple->addr_type = control.key->addr_type;
221 
222 	if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
223 		struct flow_match_ipv4_addrs match;
224 
225 		flow_rule_match_ipv4_addrs(rule, &match);
226 		tuple->ip.src_v4 = match.key->src;
227 		tuple->ip.dst_v4 = match.key->dst;
228 	} else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
229 		struct flow_match_ipv6_addrs match;
230 
231 		flow_rule_match_ipv6_addrs(rule, &match);
232 		tuple->ip.src_v6 = match.key->src;
233 		tuple->ip.dst_v6 = match.key->dst;
234 	} else {
235 		return -EOPNOTSUPP;
236 	}
237 
238 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
239 		struct flow_match_ports match;
240 
241 		flow_rule_match_ports(rule, &match);
242 		switch (tuple->ip_proto) {
243 		case IPPROTO_TCP:
244 		case IPPROTO_UDP:
245 			tuple->port.src = match.key->src;
246 			tuple->port.dst = match.key->dst;
247 			break;
248 		default:
249 			return -EOPNOTSUPP;
250 		}
251 	} else {
252 		return -EOPNOTSUPP;
253 	}
254 
255 	return 0;
256 }
257 
258 static int
mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple * tuple,struct flow_rule * rule)259 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
260 			     struct flow_rule *rule)
261 {
262 	struct flow_action *flow_action = &rule->action;
263 	struct flow_action_entry *act;
264 	u32 offset, val, ip6_offset;
265 	int i;
266 
267 	flow_action_for_each(i, act, flow_action) {
268 		if (act->id != FLOW_ACTION_MANGLE)
269 			continue;
270 
271 		offset = act->mangle.offset;
272 		val = act->mangle.val;
273 		switch (act->mangle.htype) {
274 		case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
275 			if (offset == offsetof(struct iphdr, saddr))
276 				tuple->ip.src_v4 = cpu_to_be32(val);
277 			else if (offset == offsetof(struct iphdr, daddr))
278 				tuple->ip.dst_v4 = cpu_to_be32(val);
279 			else
280 				return -EOPNOTSUPP;
281 			break;
282 
283 		case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
284 			ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
285 			ip6_offset /= 4;
286 			if (ip6_offset < 4)
287 				tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
288 			else if (ip6_offset < 8)
289 				tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
290 			else
291 				return -EOPNOTSUPP;
292 			break;
293 
294 		case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
295 			if (offset == offsetof(struct tcphdr, source))
296 				tuple->port.src = cpu_to_be16(val);
297 			else if (offset == offsetof(struct tcphdr, dest))
298 				tuple->port.dst = cpu_to_be16(val);
299 			else
300 				return -EOPNOTSUPP;
301 			break;
302 
303 		case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
304 			if (offset == offsetof(struct udphdr, source))
305 				tuple->port.src = cpu_to_be16(val);
306 			else if (offset == offsetof(struct udphdr, dest))
307 				tuple->port.dst = cpu_to_be16(val);
308 			else
309 				return -EOPNOTSUPP;
310 			break;
311 
312 		default:
313 			return -EOPNOTSUPP;
314 		}
315 	}
316 
317 	return 0;
318 }
319 
320 static int
mlx5_tc_ct_set_tuple_match(struct mlx5e_priv * priv,struct mlx5_flow_spec * spec,struct flow_rule * rule)321 mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
322 			   struct flow_rule *rule)
323 {
324 	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
325 				       outer_headers);
326 	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
327 				       outer_headers);
328 	u16 addr_type = 0;
329 	u8 ip_proto = 0;
330 
331 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
332 		struct flow_match_basic match;
333 
334 		flow_rule_match_basic(rule, &match);
335 
336 		mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
337 				       headers_v);
338 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
339 			 match.mask->ip_proto);
340 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
341 			 match.key->ip_proto);
342 
343 		ip_proto = match.key->ip_proto;
344 	}
345 
346 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
347 		struct flow_match_control match;
348 
349 		flow_rule_match_control(rule, &match);
350 		addr_type = match.key->addr_type;
351 	}
352 
353 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
354 		struct flow_match_ipv4_addrs match;
355 
356 		flow_rule_match_ipv4_addrs(rule, &match);
357 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
358 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
359 		       &match.mask->src, sizeof(match.mask->src));
360 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
361 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
362 		       &match.key->src, sizeof(match.key->src));
363 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
364 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
365 		       &match.mask->dst, sizeof(match.mask->dst));
366 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
367 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
368 		       &match.key->dst, sizeof(match.key->dst));
369 	}
370 
371 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
372 		struct flow_match_ipv6_addrs match;
373 
374 		flow_rule_match_ipv6_addrs(rule, &match);
375 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
376 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
377 		       &match.mask->src, sizeof(match.mask->src));
378 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
379 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
380 		       &match.key->src, sizeof(match.key->src));
381 
382 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
383 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
384 		       &match.mask->dst, sizeof(match.mask->dst));
385 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
386 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
387 		       &match.key->dst, sizeof(match.key->dst));
388 	}
389 
390 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
391 		struct flow_match_ports match;
392 
393 		flow_rule_match_ports(rule, &match);
394 		switch (ip_proto) {
395 		case IPPROTO_TCP:
396 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
397 				 tcp_sport, ntohs(match.mask->src));
398 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
399 				 tcp_sport, ntohs(match.key->src));
400 
401 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
402 				 tcp_dport, ntohs(match.mask->dst));
403 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
404 				 tcp_dport, ntohs(match.key->dst));
405 			break;
406 
407 		case IPPROTO_UDP:
408 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
409 				 udp_sport, ntohs(match.mask->src));
410 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
411 				 udp_sport, ntohs(match.key->src));
412 
413 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
414 				 udp_dport, ntohs(match.mask->dst));
415 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
416 				 udp_dport, ntohs(match.key->dst));
417 			break;
418 		default:
419 			break;
420 		}
421 	}
422 
423 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
424 		struct flow_match_tcp match;
425 
426 		flow_rule_match_tcp(rule, &match);
427 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
428 			 ntohs(match.mask->flags));
429 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
430 			 ntohs(match.key->flags));
431 	}
432 
433 	return 0;
434 }
435 
436 static void
mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry)437 mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
438 {
439 	if (entry->counter->is_shared &&
440 	    !refcount_dec_and_test(&entry->counter->refcount))
441 		return;
442 
443 	mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
444 	kfree(entry->counter);
445 }
446 
447 static void
mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry,bool nat)448 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
449 			  struct mlx5_ct_entry *entry,
450 			  bool nat)
451 {
452 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
453 	struct mlx5_flow_attr *attr = zone_rule->attr;
454 
455 	ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
456 
457 	mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
458 	mlx5e_mod_hdr_detach(ct_priv->dev,
459 			     ct_priv->mod_hdr_tbl, zone_rule->mh);
460 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
461 	kfree(attr);
462 }
463 
464 static void
mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry)465 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
466 			   struct mlx5_ct_entry *entry)
467 {
468 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
469 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
470 }
471 
472 static struct flow_action_entry *
mlx5_tc_ct_get_ct_metadata_action(struct flow_rule * flow_rule)473 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
474 {
475 	struct flow_action *flow_action = &flow_rule->action;
476 	struct flow_action_entry *act;
477 	int i;
478 
479 	flow_action_for_each(i, act, flow_action) {
480 		if (act->id == FLOW_ACTION_CT_METADATA)
481 			return act;
482 	}
483 
484 	return NULL;
485 }
486 
487 static int
mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv * ct_priv,struct mlx5e_tc_mod_hdr_acts * mod_acts,u8 ct_state,u32 mark,u32 labels_id,u8 zone_restore_id)488 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
489 			       struct mlx5e_tc_mod_hdr_acts *mod_acts,
490 			       u8 ct_state,
491 			       u32 mark,
492 			       u32 labels_id,
493 			       u8 zone_restore_id)
494 {
495 	enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
496 	struct mlx5_core_dev *dev = ct_priv->dev;
497 	int err;
498 
499 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
500 					CTSTATE_TO_REG, ct_state);
501 	if (err)
502 		return err;
503 
504 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
505 					MARK_TO_REG, mark);
506 	if (err)
507 		return err;
508 
509 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
510 					LABELS_TO_REG, labels_id);
511 	if (err)
512 		return err;
513 
514 	err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
515 					ZONE_RESTORE_TO_REG, zone_restore_id);
516 	if (err)
517 		return err;
518 
519 	/* Make another copy of zone id in reg_b for
520 	 * NIC rx flows since we don't copy reg_c1 to
521 	 * reg_b upon miss.
522 	 */
523 	if (ns != MLX5_FLOW_NAMESPACE_FDB) {
524 		err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
525 						NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
526 		if (err)
527 			return err;
528 	}
529 	return 0;
530 }
531 
532 static int
mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry * act,char * modact)533 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
534 				   char *modact)
535 {
536 	u32 offset = act->mangle.offset, field;
537 
538 	switch (act->mangle.htype) {
539 	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
540 		MLX5_SET(set_action_in, modact, length, 0);
541 		if (offset == offsetof(struct iphdr, saddr))
542 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
543 		else if (offset == offsetof(struct iphdr, daddr))
544 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
545 		else
546 			return -EOPNOTSUPP;
547 		break;
548 
549 	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
550 		MLX5_SET(set_action_in, modact, length, 0);
551 		if (offset == offsetof(struct ipv6hdr, saddr) + 12)
552 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
553 		else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
554 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
555 		else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
556 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
557 		else if (offset == offsetof(struct ipv6hdr, saddr))
558 			field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
559 		else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
560 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
561 		else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
562 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
563 		else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
564 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
565 		else if (offset == offsetof(struct ipv6hdr, daddr))
566 			field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
567 		else
568 			return -EOPNOTSUPP;
569 		break;
570 
571 	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
572 		MLX5_SET(set_action_in, modact, length, 16);
573 		if (offset == offsetof(struct tcphdr, source))
574 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
575 		else if (offset == offsetof(struct tcphdr, dest))
576 			field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
577 		else
578 			return -EOPNOTSUPP;
579 		break;
580 
581 	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
582 		MLX5_SET(set_action_in, modact, length, 16);
583 		if (offset == offsetof(struct udphdr, source))
584 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
585 		else if (offset == offsetof(struct udphdr, dest))
586 			field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
587 		else
588 			return -EOPNOTSUPP;
589 		break;
590 
591 	default:
592 		return -EOPNOTSUPP;
593 	}
594 
595 	MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
596 	MLX5_SET(set_action_in, modact, offset, 0);
597 	MLX5_SET(set_action_in, modact, field, field);
598 	MLX5_SET(set_action_in, modact, data, act->mangle.val);
599 
600 	return 0;
601 }
602 
603 static int
mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5e_tc_mod_hdr_acts * mod_acts)604 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
605 			    struct flow_rule *flow_rule,
606 			    struct mlx5e_tc_mod_hdr_acts *mod_acts)
607 {
608 	struct flow_action *flow_action = &flow_rule->action;
609 	struct mlx5_core_dev *mdev = ct_priv->dev;
610 	struct flow_action_entry *act;
611 	size_t action_size;
612 	char *modact;
613 	int err, i;
614 
615 	action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
616 
617 	flow_action_for_each(i, act, flow_action) {
618 		switch (act->id) {
619 		case FLOW_ACTION_MANGLE: {
620 			err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type,
621 						    mod_acts);
622 			if (err)
623 				return err;
624 
625 			modact = mod_acts->actions +
626 				 mod_acts->num_actions * action_size;
627 
628 			err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
629 			if (err)
630 				return err;
631 
632 			mod_acts->num_actions++;
633 		}
634 		break;
635 
636 		case FLOW_ACTION_CT_METADATA:
637 			/* Handled earlier */
638 			continue;
639 		default:
640 			return -EOPNOTSUPP;
641 		}
642 	}
643 
644 	return 0;
645 }
646 
647 static int
mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_attr * attr,struct flow_rule * flow_rule,struct mlx5e_mod_hdr_handle ** mh,u8 zone_restore_id,bool nat)648 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
649 				struct mlx5_flow_attr *attr,
650 				struct flow_rule *flow_rule,
651 				struct mlx5e_mod_hdr_handle **mh,
652 				u8 zone_restore_id, bool nat)
653 {
654 	struct mlx5e_tc_mod_hdr_acts mod_acts = {};
655 	struct flow_action_entry *meta;
656 	u16 ct_state = 0;
657 	int err;
658 
659 	meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
660 	if (!meta)
661 		return -EOPNOTSUPP;
662 
663 	err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels,
664 				     &attr->ct_attr.ct_labels_id);
665 	if (err)
666 		return -EOPNOTSUPP;
667 	if (nat) {
668 		err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
669 						  &mod_acts);
670 		if (err)
671 			goto err_mapping;
672 
673 		ct_state |= MLX5_CT_STATE_NAT_BIT;
674 	}
675 
676 	ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
677 	err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
678 					     ct_state,
679 					     meta->ct_metadata.mark,
680 					     attr->ct_attr.ct_labels_id,
681 					     zone_restore_id);
682 	if (err)
683 		goto err_mapping;
684 
685 	*mh = mlx5e_mod_hdr_attach(ct_priv->dev,
686 				   ct_priv->mod_hdr_tbl,
687 				   ct_priv->ns_type,
688 				   &mod_acts);
689 	if (IS_ERR(*mh)) {
690 		err = PTR_ERR(*mh);
691 		goto err_mapping;
692 	}
693 	attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
694 
695 	dealloc_mod_hdr_actions(&mod_acts);
696 	return 0;
697 
698 err_mapping:
699 	dealloc_mod_hdr_actions(&mod_acts);
700 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
701 	return err;
702 }
703 
704 static int
mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,bool nat,u8 zone_restore_id)705 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
706 			  struct flow_rule *flow_rule,
707 			  struct mlx5_ct_entry *entry,
708 			  bool nat, u8 zone_restore_id)
709 {
710 	struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
711 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
712 	struct mlx5_flow_spec *spec = NULL;
713 	struct mlx5_flow_attr *attr;
714 	int err;
715 
716 	zone_rule->nat = nat;
717 
718 	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
719 	if (!spec)
720 		return -ENOMEM;
721 
722 	attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
723 	if (!attr) {
724 		err = -ENOMEM;
725 		goto err_attr;
726 	}
727 
728 	err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
729 					      &zone_rule->mh,
730 					      zone_restore_id, nat);
731 	if (err) {
732 		ct_dbg("Failed to create ct entry mod hdr");
733 		goto err_mod_hdr;
734 	}
735 
736 	attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
737 		       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
738 		       MLX5_FLOW_CONTEXT_ACTION_COUNT;
739 	attr->dest_chain = 0;
740 	attr->dest_ft = ct_priv->post_ct;
741 	attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
742 	attr->outer_match_level = MLX5_MATCH_L4;
743 	attr->counter = entry->counter->counter;
744 	attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
745 
746 	mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
747 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
748 				    entry->tuple.zone & MLX5_CT_ZONE_MASK,
749 				    MLX5_CT_ZONE_MASK);
750 
751 	zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
752 	if (IS_ERR(zone_rule->rule)) {
753 		err = PTR_ERR(zone_rule->rule);
754 		ct_dbg("Failed to add ct entry rule, nat: %d", nat);
755 		goto err_rule;
756 	}
757 
758 	zone_rule->attr = attr;
759 
760 	kfree(spec);
761 	ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
762 
763 	return 0;
764 
765 err_rule:
766 	mlx5e_mod_hdr_detach(ct_priv->dev,
767 			     ct_priv->mod_hdr_tbl, zone_rule->mh);
768 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
769 err_mod_hdr:
770 	kfree(attr);
771 err_attr:
772 	kfree(spec);
773 	return err;
774 }
775 
776 static bool
mlx5_tc_ct_entry_valid(struct mlx5_ct_entry * entry)777 mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
778 {
779 	return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
780 }
781 
782 static struct mlx5_ct_entry *
mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_tuple * tuple)783 mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
784 {
785 	struct mlx5_ct_entry *entry;
786 
787 	entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
788 				       tuples_ht_params);
789 	if (entry && mlx5_tc_ct_entry_valid(entry) &&
790 	    refcount_inc_not_zero(&entry->refcnt)) {
791 		return entry;
792 	} else if (!entry) {
793 		entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
794 					       tuple, tuples_nat_ht_params);
795 		if (entry && mlx5_tc_ct_entry_valid(entry) &&
796 		    refcount_inc_not_zero(&entry->refcnt))
797 			return entry;
798 	}
799 
800 	return entry ? ERR_PTR(-EINVAL) : NULL;
801 }
802 
mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry * entry)803 static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
804 {
805 	struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
806 
807 	rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
808 			       &entry->tuple_nat_node,
809 			       tuples_nat_ht_params);
810 	rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
811 			       tuples_ht_params);
812 }
813 
mlx5_tc_ct_entry_del(struct mlx5_ct_entry * entry)814 static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
815 {
816 	struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
817 
818 	mlx5_tc_ct_entry_del_rules(ct_priv, entry);
819 
820 	spin_lock_bh(&ct_priv->ht_lock);
821 	mlx5_tc_ct_entry_remove_from_tuples(entry);
822 	spin_unlock_bh(&ct_priv->ht_lock);
823 
824 	mlx5_tc_ct_counter_put(ct_priv, entry);
825 	kfree(entry);
826 }
827 
828 static void
mlx5_tc_ct_entry_put(struct mlx5_ct_entry * entry)829 mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
830 {
831 	if (!refcount_dec_and_test(&entry->refcnt))
832 		return;
833 
834 	mlx5_tc_ct_entry_del(entry);
835 }
836 
mlx5_tc_ct_entry_del_work(struct work_struct * work)837 static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
838 {
839 	struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
840 
841 	mlx5_tc_ct_entry_del(entry);
842 }
843 
844 static void
__mlx5_tc_ct_entry_put(struct mlx5_ct_entry * entry)845 __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
846 {
847 	struct mlx5e_priv *priv;
848 
849 	if (!refcount_dec_and_test(&entry->refcnt))
850 		return;
851 
852 	priv = netdev_priv(entry->ct_priv->netdev);
853 	INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
854 	queue_work(priv->wq, &entry->work);
855 }
856 
857 static struct mlx5_ct_counter *
mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv * ct_priv)858 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
859 {
860 	struct mlx5_ct_counter *counter;
861 	int ret;
862 
863 	counter = kzalloc(sizeof(*counter), GFP_KERNEL);
864 	if (!counter)
865 		return ERR_PTR(-ENOMEM);
866 
867 	counter->is_shared = false;
868 	counter->counter = mlx5_fc_create(ct_priv->dev, true);
869 	if (IS_ERR(counter->counter)) {
870 		ct_dbg("Failed to create counter for ct entry");
871 		ret = PTR_ERR(counter->counter);
872 		kfree(counter);
873 		return ERR_PTR(ret);
874 	}
875 
876 	return counter;
877 }
878 
879 static struct mlx5_ct_counter *
mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_entry * entry)880 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
881 			      struct mlx5_ct_entry *entry)
882 {
883 	struct mlx5_ct_tuple rev_tuple = entry->tuple;
884 	struct mlx5_ct_counter *shared_counter;
885 	struct mlx5_ct_entry *rev_entry;
886 	__be16 tmp_port;
887 	int ret;
888 
889 	/* get the reversed tuple */
890 	tmp_port = rev_tuple.port.src;
891 	rev_tuple.port.src = rev_tuple.port.dst;
892 	rev_tuple.port.dst = tmp_port;
893 
894 	if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
895 		__be32 tmp_addr = rev_tuple.ip.src_v4;
896 
897 		rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
898 		rev_tuple.ip.dst_v4 = tmp_addr;
899 	} else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
900 		struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
901 
902 		rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
903 		rev_tuple.ip.dst_v6 = tmp_addr;
904 	} else {
905 		return ERR_PTR(-EOPNOTSUPP);
906 	}
907 
908 	/* Use the same counter as the reverse direction */
909 	spin_lock_bh(&ct_priv->ht_lock);
910 	rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
911 
912 	if (IS_ERR(rev_entry)) {
913 		spin_unlock_bh(&ct_priv->ht_lock);
914 		goto create_counter;
915 	}
916 
917 	if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
918 		ct_dbg("Using shared counter entry=0x%p rev=0x%p\n", entry, rev_entry);
919 		shared_counter = rev_entry->counter;
920 		spin_unlock_bh(&ct_priv->ht_lock);
921 
922 		mlx5_tc_ct_entry_put(rev_entry);
923 		return shared_counter;
924 	}
925 
926 	spin_unlock_bh(&ct_priv->ht_lock);
927 
928 create_counter:
929 
930 	shared_counter = mlx5_tc_ct_counter_create(ct_priv);
931 	if (IS_ERR(shared_counter)) {
932 		ret = PTR_ERR(shared_counter);
933 		return ERR_PTR(ret);
934 	}
935 
936 	shared_counter->is_shared = true;
937 	refcount_set(&shared_counter->refcount, 1);
938 	return shared_counter;
939 }
940 
941 static int
mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv * ct_priv,struct flow_rule * flow_rule,struct mlx5_ct_entry * entry,u8 zone_restore_id)942 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
943 			   struct flow_rule *flow_rule,
944 			   struct mlx5_ct_entry *entry,
945 			   u8 zone_restore_id)
946 {
947 	int err;
948 
949 	if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
950 		entry->counter = mlx5_tc_ct_counter_create(ct_priv);
951 	else
952 		entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
953 
954 	if (IS_ERR(entry->counter)) {
955 		err = PTR_ERR(entry->counter);
956 		return err;
957 	}
958 
959 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
960 					zone_restore_id);
961 	if (err)
962 		goto err_orig;
963 
964 	err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
965 					zone_restore_id);
966 	if (err)
967 		goto err_nat;
968 
969 	return 0;
970 
971 err_nat:
972 	mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
973 err_orig:
974 	mlx5_tc_ct_counter_put(ct_priv, entry);
975 	return err;
976 }
977 
978 static int
mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft * ft,struct flow_cls_offload * flow)979 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
980 				  struct flow_cls_offload *flow)
981 {
982 	struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
983 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
984 	struct flow_action_entry *meta_action;
985 	unsigned long cookie = flow->cookie;
986 	struct mlx5_ct_entry *entry;
987 	int err;
988 
989 	meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
990 	if (!meta_action)
991 		return -EOPNOTSUPP;
992 
993 	spin_lock_bh(&ct_priv->ht_lock);
994 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
995 	if (entry && refcount_inc_not_zero(&entry->refcnt)) {
996 		spin_unlock_bh(&ct_priv->ht_lock);
997 		mlx5_tc_ct_entry_put(entry);
998 		return -EEXIST;
999 	}
1000 	spin_unlock_bh(&ct_priv->ht_lock);
1001 
1002 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1003 	if (!entry)
1004 		return -ENOMEM;
1005 
1006 	entry->tuple.zone = ft->zone;
1007 	entry->cookie = flow->cookie;
1008 	entry->restore_cookie = meta_action->ct_metadata.cookie;
1009 	refcount_set(&entry->refcnt, 2);
1010 	entry->ct_priv = ct_priv;
1011 
1012 	err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
1013 	if (err)
1014 		goto err_set;
1015 
1016 	memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
1017 	err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
1018 	if (err)
1019 		goto err_set;
1020 
1021 	spin_lock_bh(&ct_priv->ht_lock);
1022 
1023 	err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
1024 					    cts_ht_params);
1025 	if (err)
1026 		goto err_entries;
1027 
1028 	err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
1029 					    &entry->tuple_node,
1030 					    tuples_ht_params);
1031 	if (err)
1032 		goto err_tuple;
1033 
1034 	if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
1035 		err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
1036 						    &entry->tuple_nat_node,
1037 						    tuples_nat_ht_params);
1038 		if (err)
1039 			goto err_tuple_nat;
1040 	}
1041 	spin_unlock_bh(&ct_priv->ht_lock);
1042 
1043 	err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
1044 					 ft->zone_restore_id);
1045 	if (err)
1046 		goto err_rules;
1047 
1048 	set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
1049 	mlx5_tc_ct_entry_put(entry); /* this function reference */
1050 
1051 	return 0;
1052 
1053 err_rules:
1054 	spin_lock_bh(&ct_priv->ht_lock);
1055 	if (mlx5_tc_ct_entry_has_nat(entry))
1056 		rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
1057 				       &entry->tuple_nat_node, tuples_nat_ht_params);
1058 err_tuple_nat:
1059 	rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
1060 			       &entry->tuple_node,
1061 			       tuples_ht_params);
1062 err_tuple:
1063 	rhashtable_remove_fast(&ft->ct_entries_ht,
1064 			       &entry->node,
1065 			       cts_ht_params);
1066 err_entries:
1067 	spin_unlock_bh(&ct_priv->ht_lock);
1068 err_set:
1069 	kfree(entry);
1070 	if (err != -EEXIST)
1071 		netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
1072 	return err;
1073 }
1074 
1075 static int
mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft * ft,struct flow_cls_offload * flow)1076 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
1077 				  struct flow_cls_offload *flow)
1078 {
1079 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1080 	unsigned long cookie = flow->cookie;
1081 	struct mlx5_ct_entry *entry;
1082 
1083 	spin_lock_bh(&ct_priv->ht_lock);
1084 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1085 	if (!entry) {
1086 		spin_unlock_bh(&ct_priv->ht_lock);
1087 		return -ENOENT;
1088 	}
1089 
1090 	if (!mlx5_tc_ct_entry_valid(entry)) {
1091 		spin_unlock_bh(&ct_priv->ht_lock);
1092 		return -EINVAL;
1093 	}
1094 
1095 	rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
1096 	mlx5_tc_ct_entry_remove_from_tuples(entry);
1097 	spin_unlock_bh(&ct_priv->ht_lock);
1098 
1099 	mlx5_tc_ct_entry_put(entry);
1100 
1101 	return 0;
1102 }
1103 
1104 static int
mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft * ft,struct flow_cls_offload * f)1105 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
1106 				    struct flow_cls_offload *f)
1107 {
1108 	struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1109 	unsigned long cookie = f->cookie;
1110 	struct mlx5_ct_entry *entry;
1111 	u64 lastuse, packets, bytes;
1112 
1113 	spin_lock_bh(&ct_priv->ht_lock);
1114 	entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1115 	if (!entry) {
1116 		spin_unlock_bh(&ct_priv->ht_lock);
1117 		return -ENOENT;
1118 	}
1119 
1120 	if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
1121 		spin_unlock_bh(&ct_priv->ht_lock);
1122 		return -EINVAL;
1123 	}
1124 
1125 	spin_unlock_bh(&ct_priv->ht_lock);
1126 
1127 	mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
1128 	flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
1129 			  FLOW_ACTION_HW_STATS_DELAYED);
1130 
1131 	mlx5_tc_ct_entry_put(entry);
1132 	return 0;
1133 }
1134 
1135 static int
mlx5_tc_ct_block_flow_offload(enum tc_setup_type type,void * type_data,void * cb_priv)1136 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
1137 			      void *cb_priv)
1138 {
1139 	struct flow_cls_offload *f = type_data;
1140 	struct mlx5_ct_ft *ft = cb_priv;
1141 
1142 	if (type != TC_SETUP_CLSFLOWER)
1143 		return -EOPNOTSUPP;
1144 
1145 	switch (f->command) {
1146 	case FLOW_CLS_REPLACE:
1147 		return mlx5_tc_ct_block_flow_offload_add(ft, f);
1148 	case FLOW_CLS_DESTROY:
1149 		return mlx5_tc_ct_block_flow_offload_del(ft, f);
1150 	case FLOW_CLS_STATS:
1151 		return mlx5_tc_ct_block_flow_offload_stats(ft, f);
1152 	default:
1153 		break;
1154 	}
1155 
1156 	return -EOPNOTSUPP;
1157 }
1158 
1159 static bool
mlx5_tc_ct_skb_to_tuple(struct sk_buff * skb,struct mlx5_ct_tuple * tuple,u16 zone)1160 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
1161 			u16 zone)
1162 {
1163 	struct flow_keys flow_keys;
1164 
1165 	skb_reset_network_header(skb);
1166 	skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
1167 
1168 	tuple->zone = zone;
1169 
1170 	if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
1171 	    flow_keys.basic.ip_proto != IPPROTO_UDP)
1172 		return false;
1173 
1174 	tuple->port.src = flow_keys.ports.src;
1175 	tuple->port.dst = flow_keys.ports.dst;
1176 	tuple->n_proto = flow_keys.basic.n_proto;
1177 	tuple->ip_proto = flow_keys.basic.ip_proto;
1178 
1179 	switch (flow_keys.basic.n_proto) {
1180 	case htons(ETH_P_IP):
1181 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1182 		tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
1183 		tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
1184 		break;
1185 
1186 	case htons(ETH_P_IPV6):
1187 		tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1188 		tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
1189 		tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
1190 		break;
1191 	default:
1192 		goto out;
1193 	}
1194 
1195 	return true;
1196 
1197 out:
1198 	return false;
1199 }
1200 
mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec * spec)1201 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
1202 {
1203 	u32 ctstate = 0, ctstate_mask = 0;
1204 
1205 	mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
1206 					&ctstate, &ctstate_mask);
1207 
1208 	if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT)
1209 		return -EOPNOTSUPP;
1210 
1211 	ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
1212 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1213 				    ctstate, ctstate_mask);
1214 
1215 	return 0;
1216 }
1217 
mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv * priv,struct mlx5_ct_attr * ct_attr)1218 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
1219 {
1220 	if (!priv || !ct_attr->ct_labels_id)
1221 		return;
1222 
1223 	mlx5_put_label_mapping(priv, ct_attr->ct_labels_id);
1224 }
1225 
1226 int
mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv * priv,struct mlx5_flow_spec * spec,struct flow_cls_offload * f,struct mlx5_ct_attr * ct_attr,struct netlink_ext_ack * extack)1227 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
1228 		     struct mlx5_flow_spec *spec,
1229 		     struct flow_cls_offload *f,
1230 		     struct mlx5_ct_attr *ct_attr,
1231 		     struct netlink_ext_ack *extack)
1232 {
1233 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1234 	struct flow_dissector_key_ct *mask, *key;
1235 	bool trk, est, untrk, unest, new;
1236 	u32 ctstate = 0, ctstate_mask = 0;
1237 	u16 ct_state_on, ct_state_off;
1238 	u16 ct_state, ct_state_mask;
1239 	struct flow_match_ct match;
1240 	u32 ct_labels[4];
1241 
1242 	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
1243 		return 0;
1244 
1245 	if (!priv) {
1246 		NL_SET_ERR_MSG_MOD(extack,
1247 				   "offload of ct matching isn't available");
1248 		return -EOPNOTSUPP;
1249 	}
1250 
1251 	flow_rule_match_ct(rule, &match);
1252 
1253 	key = match.key;
1254 	mask = match.mask;
1255 
1256 	ct_state = key->ct_state;
1257 	ct_state_mask = mask->ct_state;
1258 
1259 	if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1260 			      TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1261 			      TCA_FLOWER_KEY_CT_FLAGS_NEW)) {
1262 		NL_SET_ERR_MSG_MOD(extack,
1263 				   "only ct_state trk, est and new are supported for offload");
1264 		return -EOPNOTSUPP;
1265 	}
1266 
1267 	ct_state_on = ct_state & ct_state_mask;
1268 	ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1269 	trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1270 	new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1271 	est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1272 	untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1273 	unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1274 
1275 	ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1276 	ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1277 	ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1278 	ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1279 
1280 	if (new) {
1281 		NL_SET_ERR_MSG_MOD(extack,
1282 				   "matching on ct_state +new isn't supported");
1283 		return -EOPNOTSUPP;
1284 	}
1285 
1286 	if (mask->ct_zone)
1287 		mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1288 					    key->ct_zone, MLX5_CT_ZONE_MASK);
1289 	if (ctstate_mask)
1290 		mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1291 					    ctstate, ctstate_mask);
1292 	if (mask->ct_mark)
1293 		mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1294 					    key->ct_mark, mask->ct_mark);
1295 	if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1296 	    mask->ct_labels[3]) {
1297 		ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1298 		ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1299 		ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1300 		ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1301 		if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id))
1302 			return -EOPNOTSUPP;
1303 		mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1304 					    MLX5_CT_LABELS_MASK);
1305 	}
1306 
1307 	return 0;
1308 }
1309 
1310 int
mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv * priv,struct mlx5_flow_attr * attr,const struct flow_action_entry * act,struct netlink_ext_ack * extack)1311 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
1312 			struct mlx5_flow_attr *attr,
1313 			const struct flow_action_entry *act,
1314 			struct netlink_ext_ack *extack)
1315 {
1316 	if (!priv) {
1317 		NL_SET_ERR_MSG_MOD(extack,
1318 				   "offload of ct action isn't available");
1319 		return -EOPNOTSUPP;
1320 	}
1321 
1322 	attr->ct_attr.zone = act->ct.zone;
1323 	attr->ct_attr.ct_action = act->ct.action;
1324 	attr->ct_attr.nf_ft = act->ct.flow_table;
1325 
1326 	return 0;
1327 }
1328 
tc_ct_pre_ct_add_rules(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct,bool nat)1329 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1330 				  struct mlx5_tc_ct_pre *pre_ct,
1331 				  bool nat)
1332 {
1333 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1334 	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1335 	struct mlx5_core_dev *dev = ct_priv->dev;
1336 	struct mlx5_flow_table *ft = pre_ct->ft;
1337 	struct mlx5_flow_destination dest = {};
1338 	struct mlx5_flow_act flow_act = {};
1339 	struct mlx5_modify_hdr *mod_hdr;
1340 	struct mlx5_flow_handle *rule;
1341 	struct mlx5_flow_spec *spec;
1342 	u32 ctstate;
1343 	u16 zone;
1344 	int err;
1345 
1346 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1347 	if (!spec)
1348 		return -ENOMEM;
1349 
1350 	zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1351 	err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
1352 					ZONE_TO_REG, zone);
1353 	if (err) {
1354 		ct_dbg("Failed to set zone register mapping");
1355 		goto err_mapping;
1356 	}
1357 
1358 	mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
1359 					   pre_mod_acts.num_actions,
1360 					   pre_mod_acts.actions);
1361 
1362 	if (IS_ERR(mod_hdr)) {
1363 		err = PTR_ERR(mod_hdr);
1364 		ct_dbg("Failed to create pre ct mod hdr");
1365 		goto err_mapping;
1366 	}
1367 	pre_ct->modify_hdr = mod_hdr;
1368 
1369 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1370 			  MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1371 	flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1372 	flow_act.modify_hdr = mod_hdr;
1373 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1374 
1375 	/* add flow rule */
1376 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1377 				    zone, MLX5_CT_ZONE_MASK);
1378 	ctstate = MLX5_CT_STATE_TRK_BIT;
1379 	if (nat)
1380 		ctstate |= MLX5_CT_STATE_NAT_BIT;
1381 	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1382 
1383 	dest.ft = ct_priv->post_ct;
1384 	rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1385 	if (IS_ERR(rule)) {
1386 		err = PTR_ERR(rule);
1387 		ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1388 		goto err_flow_rule;
1389 	}
1390 	pre_ct->flow_rule = rule;
1391 
1392 	/* add miss rule */
1393 	memset(spec, 0, sizeof(*spec));
1394 	dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1395 	rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1396 	if (IS_ERR(rule)) {
1397 		err = PTR_ERR(rule);
1398 		ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1399 		goto err_miss_rule;
1400 	}
1401 	pre_ct->miss_rule = rule;
1402 
1403 	dealloc_mod_hdr_actions(&pre_mod_acts);
1404 	kvfree(spec);
1405 	return 0;
1406 
1407 err_miss_rule:
1408 	mlx5_del_flow_rules(pre_ct->flow_rule);
1409 err_flow_rule:
1410 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1411 err_mapping:
1412 	dealloc_mod_hdr_actions(&pre_mod_acts);
1413 	kvfree(spec);
1414 	return err;
1415 }
1416 
1417 static void
tc_ct_pre_ct_del_rules(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct)1418 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1419 		       struct mlx5_tc_ct_pre *pre_ct)
1420 {
1421 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1422 	struct mlx5_core_dev *dev = ct_priv->dev;
1423 
1424 	mlx5_del_flow_rules(pre_ct->flow_rule);
1425 	mlx5_del_flow_rules(pre_ct->miss_rule);
1426 	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1427 }
1428 
1429 static int
mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct,bool nat)1430 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1431 			struct mlx5_tc_ct_pre *pre_ct,
1432 			bool nat)
1433 {
1434 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1435 	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1436 	struct mlx5_core_dev *dev = ct_priv->dev;
1437 	struct mlx5_flow_table_attr ft_attr = {};
1438 	struct mlx5_flow_namespace *ns;
1439 	struct mlx5_flow_table *ft;
1440 	struct mlx5_flow_group *g;
1441 	u32 metadata_reg_c_2_mask;
1442 	u32 *flow_group_in;
1443 	void *misc;
1444 	int err;
1445 
1446 	ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
1447 	if (!ns) {
1448 		err = -EOPNOTSUPP;
1449 		ct_dbg("Failed to get flow namespace");
1450 		return err;
1451 	}
1452 
1453 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1454 	if (!flow_group_in)
1455 		return -ENOMEM;
1456 
1457 	ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1458 	ft_attr.prio =  ct_priv->ns_type ==  MLX5_FLOW_NAMESPACE_FDB ?
1459 			FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
1460 	ft_attr.max_fte = 2;
1461 	ft_attr.level = 1;
1462 	ft = mlx5_create_flow_table(ns, &ft_attr);
1463 	if (IS_ERR(ft)) {
1464 		err = PTR_ERR(ft);
1465 		ct_dbg("Failed to create pre ct table");
1466 		goto out_free;
1467 	}
1468 	pre_ct->ft = ft;
1469 
1470 	/* create flow group */
1471 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1472 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1473 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1474 		 MLX5_MATCH_MISC_PARAMETERS_2);
1475 
1476 	misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1477 			    match_criteria.misc_parameters_2);
1478 
1479 	metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1480 	metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1481 	if (nat)
1482 		metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1483 
1484 	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1485 		 metadata_reg_c_2_mask);
1486 
1487 	g = mlx5_create_flow_group(ft, flow_group_in);
1488 	if (IS_ERR(g)) {
1489 		err = PTR_ERR(g);
1490 		ct_dbg("Failed to create pre ct group");
1491 		goto err_flow_grp;
1492 	}
1493 	pre_ct->flow_grp = g;
1494 
1495 	/* create miss group */
1496 	memset(flow_group_in, 0, inlen);
1497 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1498 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1499 	g = mlx5_create_flow_group(ft, flow_group_in);
1500 	if (IS_ERR(g)) {
1501 		err = PTR_ERR(g);
1502 		ct_dbg("Failed to create pre ct miss group");
1503 		goto err_miss_grp;
1504 	}
1505 	pre_ct->miss_grp = g;
1506 
1507 	err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1508 	if (err)
1509 		goto err_add_rules;
1510 
1511 	kvfree(flow_group_in);
1512 	return 0;
1513 
1514 err_add_rules:
1515 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1516 err_miss_grp:
1517 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1518 err_flow_grp:
1519 	mlx5_destroy_flow_table(ft);
1520 out_free:
1521 	kvfree(flow_group_in);
1522 	return err;
1523 }
1524 
1525 static void
mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft * ct_ft,struct mlx5_tc_ct_pre * pre_ct)1526 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1527 		       struct mlx5_tc_ct_pre *pre_ct)
1528 {
1529 	tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1530 	mlx5_destroy_flow_group(pre_ct->miss_grp);
1531 	mlx5_destroy_flow_group(pre_ct->flow_grp);
1532 	mlx5_destroy_flow_table(pre_ct->ft);
1533 }
1534 
1535 static int
mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft * ft)1536 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1537 {
1538 	int err;
1539 
1540 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1541 	if (err)
1542 		return err;
1543 
1544 	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1545 	if (err)
1546 		goto err_pre_ct_nat;
1547 
1548 	return 0;
1549 
1550 err_pre_ct_nat:
1551 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1552 	return err;
1553 }
1554 
1555 static void
mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft * ft)1556 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1557 {
1558 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1559 	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1560 }
1561 
1562 static struct mlx5_ct_ft *
mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv * ct_priv,u16 zone,struct nf_flowtable * nf_ft)1563 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1564 		     struct nf_flowtable *nf_ft)
1565 {
1566 	struct mlx5_ct_ft *ft;
1567 	int err;
1568 
1569 	ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1570 	if (ft) {
1571 		refcount_inc(&ft->refcount);
1572 		return ft;
1573 	}
1574 
1575 	ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1576 	if (!ft)
1577 		return ERR_PTR(-ENOMEM);
1578 
1579 	err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1580 	if (err)
1581 		goto err_mapping;
1582 
1583 	ft->zone = zone;
1584 	ft->nf_ft = nf_ft;
1585 	ft->ct_priv = ct_priv;
1586 	refcount_set(&ft->refcount, 1);
1587 
1588 	err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1589 	if (err)
1590 		goto err_alloc_pre_ct;
1591 
1592 	err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1593 	if (err)
1594 		goto err_init;
1595 
1596 	err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1597 				     zone_params);
1598 	if (err)
1599 		goto err_insert;
1600 
1601 	err = nf_flow_table_offload_add_cb(ft->nf_ft,
1602 					   mlx5_tc_ct_block_flow_offload, ft);
1603 	if (err)
1604 		goto err_add_cb;
1605 
1606 	return ft;
1607 
1608 err_add_cb:
1609 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1610 err_insert:
1611 	rhashtable_destroy(&ft->ct_entries_ht);
1612 err_init:
1613 	mlx5_tc_ct_free_pre_ct_tables(ft);
1614 err_alloc_pre_ct:
1615 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1616 err_mapping:
1617 	kfree(ft);
1618 	return ERR_PTR(err);
1619 }
1620 
1621 static void
mlx5_tc_ct_flush_ft_entry(void * ptr,void * arg)1622 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1623 {
1624 	struct mlx5_ct_entry *entry = ptr;
1625 
1626 	mlx5_tc_ct_entry_put(entry);
1627 }
1628 
1629 static void
mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_ct_ft * ft)1630 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1631 {
1632 	struct mlx5e_priv *priv;
1633 
1634 	if (!refcount_dec_and_test(&ft->refcount))
1635 		return;
1636 
1637 	nf_flow_table_offload_del_cb(ft->nf_ft,
1638 				     mlx5_tc_ct_block_flow_offload, ft);
1639 	rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1640 	rhashtable_free_and_destroy(&ft->ct_entries_ht,
1641 				    mlx5_tc_ct_flush_ft_entry,
1642 				    ct_priv);
1643 	priv = netdev_priv(ct_priv->netdev);
1644 	flush_workqueue(priv->wq);
1645 	mlx5_tc_ct_free_pre_ct_tables(ft);
1646 	mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1647 	kfree(ft);
1648 }
1649 
1650 /* We translate the tc filter with CT action to the following HW model:
1651  *
1652  * +---------------------+
1653  * + ft prio (tc chain) +
1654  * + original match      +
1655  * +---------------------+
1656  *      | set chain miss mapping
1657  *      | set fte_id
1658  *      | set tunnel_id
1659  *      | do decap
1660  *      v
1661  * +---------------------+
1662  * + pre_ct/pre_ct_nat   +  if matches     +---------------------+
1663  * + zone+nat match      +---------------->+ post_ct (see below) +
1664  * +---------------------+  set zone       +---------------------+
1665  *      | set zone
1666  *      v
1667  * +--------------------+
1668  * + CT (nat or no nat) +
1669  * + tuple + zone match +
1670  * +--------------------+
1671  *      | set mark
1672  *      | set labels_id
1673  *      | set established
1674  *	| set zone_restore
1675  *      | do nat (if needed)
1676  *      v
1677  * +--------------+
1678  * + post_ct      + original filter actions
1679  * + fte_id match +------------------------>
1680  * +--------------+
1681  */
1682 static struct mlx5_flow_handle *
__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv * ct_priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * orig_spec,struct mlx5_flow_attr * attr)1683 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
1684 			  struct mlx5e_tc_flow *flow,
1685 			  struct mlx5_flow_spec *orig_spec,
1686 			  struct mlx5_flow_attr *attr)
1687 {
1688 	bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1689 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1690 	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1691 	u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1692 	struct mlx5_flow_spec *post_ct_spec = NULL;
1693 	struct mlx5_flow_attr *pre_ct_attr;
1694 	struct mlx5_modify_hdr *mod_hdr;
1695 	struct mlx5_flow_handle *rule;
1696 	struct mlx5_ct_flow *ct_flow;
1697 	int chain_mapping = 0, err;
1698 	struct mlx5_ct_ft *ft;
1699 	u32 fte_id = 1;
1700 
1701 	post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
1702 	ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1703 	if (!post_ct_spec || !ct_flow) {
1704 		kfree(post_ct_spec);
1705 		kfree(ct_flow);
1706 		return ERR_PTR(-ENOMEM);
1707 	}
1708 
1709 	/* Register for CT established events */
1710 	ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1711 				  attr->ct_attr.nf_ft);
1712 	if (IS_ERR(ft)) {
1713 		err = PTR_ERR(ft);
1714 		ct_dbg("Failed to register to ft callback");
1715 		goto err_ft;
1716 	}
1717 	ct_flow->ft = ft;
1718 
1719 	err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id,
1720 			    MLX5_FTE_ID_MAX, GFP_KERNEL);
1721 	if (err) {
1722 		netdev_warn(priv->netdev,
1723 			    "Failed to allocate fte id, err: %d\n", err);
1724 		goto err_idr;
1725 	}
1726 	ct_flow->fte_id = fte_id;
1727 
1728 	/* Base flow attributes of both rules on original rule attribute */
1729 	ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1730 	if (!ct_flow->pre_ct_attr) {
1731 		err = -ENOMEM;
1732 		goto err_alloc_pre;
1733 	}
1734 
1735 	ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1736 	if (!ct_flow->post_ct_attr) {
1737 		err = -ENOMEM;
1738 		goto err_alloc_post;
1739 	}
1740 
1741 	pre_ct_attr = ct_flow->pre_ct_attr;
1742 	memcpy(pre_ct_attr, attr, attr_sz);
1743 	memcpy(ct_flow->post_ct_attr, attr, attr_sz);
1744 
1745 	/* Modify the original rule's action to fwd and modify, leave decap */
1746 	pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
1747 	pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1748 			       MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1749 
1750 	/* Write chain miss tag for miss in ct table as we
1751 	 * don't go though all prios of this chain as normal tc rules
1752 	 * miss.
1753 	 */
1754 	err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
1755 					    &chain_mapping);
1756 	if (err) {
1757 		ct_dbg("Failed to get chain register mapping for chain");
1758 		goto err_get_chain;
1759 	}
1760 	ct_flow->chain_mapping = chain_mapping;
1761 
1762 	err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1763 					CHAIN_TO_REG, chain_mapping);
1764 	if (err) {
1765 		ct_dbg("Failed to set chain register mapping");
1766 		goto err_mapping;
1767 	}
1768 
1769 	err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1770 					FTEID_TO_REG, fte_id);
1771 	if (err) {
1772 		ct_dbg("Failed to set fte_id register mapping");
1773 		goto err_mapping;
1774 	}
1775 
1776 	/* If original flow is decap, we do it before going into ct table
1777 	 * so add a rewrite for the tunnel match_id.
1778 	 */
1779 	if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
1780 	    attr->chain == 0) {
1781 		u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
1782 
1783 		err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts,
1784 						ct_priv->ns_type,
1785 						TUNNEL_TO_REG,
1786 						tun_id);
1787 		if (err) {
1788 			ct_dbg("Failed to set tunnel register mapping");
1789 			goto err_mapping;
1790 		}
1791 	}
1792 
1793 	mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1794 					   pre_mod_acts.num_actions,
1795 					   pre_mod_acts.actions);
1796 	if (IS_ERR(mod_hdr)) {
1797 		err = PTR_ERR(mod_hdr);
1798 		ct_dbg("Failed to create pre ct mod hdr");
1799 		goto err_mapping;
1800 	}
1801 	pre_ct_attr->modify_hdr = mod_hdr;
1802 
1803 	/* Post ct rule matches on fte_id and executes original rule's
1804 	 * tc rule action
1805 	 */
1806 	mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
1807 				    fte_id, MLX5_FTE_ID_MASK);
1808 
1809 	/* Put post_ct rule on post_ct flow table */
1810 	ct_flow->post_ct_attr->chain = 0;
1811 	ct_flow->post_ct_attr->prio = 0;
1812 	ct_flow->post_ct_attr->ft = ct_priv->post_ct;
1813 
1814 	ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
1815 	ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
1816 	ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
1817 	rule = mlx5_tc_rule_insert(priv, post_ct_spec,
1818 				   ct_flow->post_ct_attr);
1819 	ct_flow->post_ct_rule = rule;
1820 	if (IS_ERR(ct_flow->post_ct_rule)) {
1821 		err = PTR_ERR(ct_flow->post_ct_rule);
1822 		ct_dbg("Failed to add post ct rule");
1823 		goto err_insert_post_ct;
1824 	}
1825 
1826 	/* Change original rule point to ct table */
1827 	pre_ct_attr->dest_chain = 0;
1828 	pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
1829 	ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
1830 						   pre_ct_attr);
1831 	if (IS_ERR(ct_flow->pre_ct_rule)) {
1832 		err = PTR_ERR(ct_flow->pre_ct_rule);
1833 		ct_dbg("Failed to add pre ct rule");
1834 		goto err_insert_orig;
1835 	}
1836 
1837 	attr->ct_attr.ct_flow = ct_flow;
1838 	dealloc_mod_hdr_actions(&pre_mod_acts);
1839 	kfree(post_ct_spec);
1840 
1841 	return rule;
1842 
1843 err_insert_orig:
1844 	mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
1845 			    ct_flow->post_ct_attr);
1846 err_insert_post_ct:
1847 	mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1848 err_mapping:
1849 	dealloc_mod_hdr_actions(&pre_mod_acts);
1850 	mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1851 err_get_chain:
1852 	kfree(ct_flow->post_ct_attr);
1853 err_alloc_post:
1854 	kfree(ct_flow->pre_ct_attr);
1855 err_alloc_pre:
1856 	idr_remove(&ct_priv->fte_ids, fte_id);
1857 err_idr:
1858 	mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1859 err_ft:
1860 	kfree(post_ct_spec);
1861 	kfree(ct_flow);
1862 	netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1863 	return ERR_PTR(err);
1864 }
1865 
1866 static struct mlx5_flow_handle *
__mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv * ct_priv,struct mlx5_flow_spec * orig_spec,struct mlx5_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_acts)1867 __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
1868 				struct mlx5_flow_spec *orig_spec,
1869 				struct mlx5_flow_attr *attr,
1870 				struct mlx5e_tc_mod_hdr_acts *mod_acts)
1871 {
1872 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1873 	u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1874 	struct mlx5_flow_attr *pre_ct_attr;
1875 	struct mlx5_modify_hdr *mod_hdr;
1876 	struct mlx5_flow_handle *rule;
1877 	struct mlx5_ct_flow *ct_flow;
1878 	int err;
1879 
1880 	ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1881 	if (!ct_flow)
1882 		return ERR_PTR(-ENOMEM);
1883 
1884 	/* Base esw attributes on original rule attribute */
1885 	pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1886 	if (!pre_ct_attr) {
1887 		err = -ENOMEM;
1888 		goto err_attr;
1889 	}
1890 
1891 	memcpy(pre_ct_attr, attr, attr_sz);
1892 
1893 	err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
1894 	if (err) {
1895 		ct_dbg("Failed to set register for ct clear");
1896 		goto err_set_registers;
1897 	}
1898 
1899 	mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1900 					   mod_acts->num_actions,
1901 					   mod_acts->actions);
1902 	if (IS_ERR(mod_hdr)) {
1903 		err = PTR_ERR(mod_hdr);
1904 		ct_dbg("Failed to add create ct clear mod hdr");
1905 		goto err_set_registers;
1906 	}
1907 
1908 	dealloc_mod_hdr_actions(mod_acts);
1909 	pre_ct_attr->modify_hdr = mod_hdr;
1910 	pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1911 
1912 	rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
1913 	if (IS_ERR(rule)) {
1914 		err = PTR_ERR(rule);
1915 		ct_dbg("Failed to add ct clear rule");
1916 		goto err_insert;
1917 	}
1918 
1919 	attr->ct_attr.ct_flow = ct_flow;
1920 	ct_flow->pre_ct_attr = pre_ct_attr;
1921 	ct_flow->pre_ct_rule = rule;
1922 	return rule;
1923 
1924 err_insert:
1925 	mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
1926 err_set_registers:
1927 	netdev_warn(priv->netdev,
1928 		    "Failed to offload ct clear flow, err %d\n", err);
1929 	kfree(pre_ct_attr);
1930 err_attr:
1931 	kfree(ct_flow);
1932 
1933 	return ERR_PTR(err);
1934 }
1935 
1936 struct mlx5_flow_handle *
mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_spec * spec,struct mlx5_flow_attr * attr,struct mlx5e_tc_mod_hdr_acts * mod_hdr_acts)1937 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
1938 			struct mlx5e_tc_flow *flow,
1939 			struct mlx5_flow_spec *spec,
1940 			struct mlx5_flow_attr *attr,
1941 			struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
1942 {
1943 	bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
1944 	struct mlx5_flow_handle *rule;
1945 
1946 	if (!priv)
1947 		return ERR_PTR(-EOPNOTSUPP);
1948 
1949 	mutex_lock(&priv->control_lock);
1950 
1951 	if (clear_action)
1952 		rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
1953 	else
1954 		rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
1955 	mutex_unlock(&priv->control_lock);
1956 
1957 	return rule;
1958 }
1959 
1960 static void
__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv * ct_priv,struct mlx5e_tc_flow * flow,struct mlx5_ct_flow * ct_flow)1961 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
1962 			 struct mlx5e_tc_flow *flow,
1963 			 struct mlx5_ct_flow *ct_flow)
1964 {
1965 	struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
1966 	struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1967 
1968 	mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule,
1969 			    pre_ct_attr);
1970 	mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1971 
1972 	if (ct_flow->post_ct_rule) {
1973 		mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
1974 				    ct_flow->post_ct_attr);
1975 		mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1976 		idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
1977 		mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
1978 	}
1979 
1980 	kfree(ct_flow->pre_ct_attr);
1981 	kfree(ct_flow->post_ct_attr);
1982 	kfree(ct_flow);
1983 }
1984 
1985 void
mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv * priv,struct mlx5e_tc_flow * flow,struct mlx5_flow_attr * attr)1986 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
1987 		       struct mlx5e_tc_flow *flow,
1988 		       struct mlx5_flow_attr *attr)
1989 {
1990 	struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
1991 
1992 	/* We are called on error to clean up stuff from parsing
1993 	 * but we don't have anything for now
1994 	 */
1995 	if (!ct_flow)
1996 		return;
1997 
1998 	mutex_lock(&priv->control_lock);
1999 	__mlx5_tc_ct_delete_flow(priv, flow, ct_flow);
2000 	mutex_unlock(&priv->control_lock);
2001 }
2002 
2003 static int
mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch * esw,const char ** err_msg)2004 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
2005 				  const char **err_msg)
2006 {
2007 	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
2008 		*err_msg = "firmware level support is missing";
2009 		return -EOPNOTSUPP;
2010 	}
2011 
2012 	if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
2013 		/* vlan workaround should be avoided for multi chain rules.
2014 		 * This is just a sanity check as pop vlan action should
2015 		 * be supported by any FW that supports ignore_flow_level
2016 		 */
2017 
2018 		*err_msg = "firmware vlan actions support is missing";
2019 		return -EOPNOTSUPP;
2020 	}
2021 
2022 	if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
2023 				    fdb_modify_header_fwd_to_table)) {
2024 		/* CT always writes to registers which are mod header actions.
2025 		 * Therefore, mod header and goto is required
2026 		 */
2027 
2028 		*err_msg = "firmware fwd and modify support is missing";
2029 		return -EOPNOTSUPP;
2030 	}
2031 
2032 	if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2033 		*err_msg = "register loopback isn't supported";
2034 		return -EOPNOTSUPP;
2035 	}
2036 
2037 	return 0;
2038 }
2039 
2040 static int
mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv * priv,const char ** err_msg)2041 mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv,
2042 				  const char **err_msg)
2043 {
2044 	if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
2045 		*err_msg = "firmware level support is missing";
2046 		return -EOPNOTSUPP;
2047 	}
2048 
2049 	return 0;
2050 }
2051 
2052 static int
mlx5_tc_ct_init_check_support(struct mlx5e_priv * priv,enum mlx5_flow_namespace_type ns_type,const char ** err_msg)2053 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
2054 			      enum mlx5_flow_namespace_type ns_type,
2055 			      const char **err_msg)
2056 {
2057 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2058 
2059 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
2060 	/* cannot restore chain ID on HW miss */
2061 
2062 	*err_msg = "tc skb extension missing";
2063 	return -EOPNOTSUPP;
2064 #endif
2065 	if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
2066 		return mlx5_tc_ct_init_check_esw_support(esw, err_msg);
2067 	else
2068 		return mlx5_tc_ct_init_check_nic_support(priv, err_msg);
2069 }
2070 
2071 #define INIT_ERR_PREFIX "tc ct offload init failed"
2072 
2073 struct mlx5_tc_ct_priv *
mlx5_tc_ct_init(struct mlx5e_priv * priv,struct mlx5_fs_chains * chains,struct mod_hdr_tbl * mod_hdr,enum mlx5_flow_namespace_type ns_type)2074 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
2075 		struct mod_hdr_tbl *mod_hdr,
2076 		enum mlx5_flow_namespace_type ns_type)
2077 {
2078 	struct mlx5_tc_ct_priv *ct_priv;
2079 	struct mlx5_core_dev *dev;
2080 	const char *msg;
2081 	int err;
2082 
2083 	dev = priv->mdev;
2084 	err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg);
2085 	if (err) {
2086 		mlx5_core_warn(dev,
2087 			       "tc ct offload not supported, %s\n",
2088 			       msg);
2089 		goto err_support;
2090 	}
2091 
2092 	ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
2093 	if (!ct_priv)
2094 		goto err_alloc;
2095 
2096 	ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
2097 	if (IS_ERR(ct_priv->zone_mapping)) {
2098 		err = PTR_ERR(ct_priv->zone_mapping);
2099 		goto err_mapping_zone;
2100 	}
2101 
2102 	ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
2103 	if (IS_ERR(ct_priv->labels_mapping)) {
2104 		err = PTR_ERR(ct_priv->labels_mapping);
2105 		goto err_mapping_labels;
2106 	}
2107 
2108 	spin_lock_init(&ct_priv->ht_lock);
2109 	ct_priv->ns_type = ns_type;
2110 	ct_priv->chains = chains;
2111 	ct_priv->netdev = priv->netdev;
2112 	ct_priv->dev = priv->mdev;
2113 	ct_priv->mod_hdr_tbl = mod_hdr;
2114 	ct_priv->ct = mlx5_chains_create_global_table(chains);
2115 	if (IS_ERR(ct_priv->ct)) {
2116 		err = PTR_ERR(ct_priv->ct);
2117 		mlx5_core_warn(dev,
2118 			       "%s, failed to create ct table err: %d\n",
2119 			       INIT_ERR_PREFIX, err);
2120 		goto err_ct_tbl;
2121 	}
2122 
2123 	ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
2124 	if (IS_ERR(ct_priv->ct_nat)) {
2125 		err = PTR_ERR(ct_priv->ct_nat);
2126 		mlx5_core_warn(dev,
2127 			       "%s, failed to create ct nat table err: %d\n",
2128 			       INIT_ERR_PREFIX, err);
2129 		goto err_ct_nat_tbl;
2130 	}
2131 
2132 	ct_priv->post_ct = mlx5_chains_create_global_table(chains);
2133 	if (IS_ERR(ct_priv->post_ct)) {
2134 		err = PTR_ERR(ct_priv->post_ct);
2135 		mlx5_core_warn(dev,
2136 			       "%s, failed to create post ct table err: %d\n",
2137 			       INIT_ERR_PREFIX, err);
2138 		goto err_post_ct_tbl;
2139 	}
2140 
2141 	idr_init(&ct_priv->fte_ids);
2142 	mutex_init(&ct_priv->control_lock);
2143 	rhashtable_init(&ct_priv->zone_ht, &zone_params);
2144 	rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
2145 	rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
2146 
2147 	return ct_priv;
2148 
2149 err_post_ct_tbl:
2150 	mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2151 err_ct_nat_tbl:
2152 	mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2153 err_ct_tbl:
2154 	mapping_destroy(ct_priv->labels_mapping);
2155 err_mapping_labels:
2156 	mapping_destroy(ct_priv->zone_mapping);
2157 err_mapping_zone:
2158 	kfree(ct_priv);
2159 err_alloc:
2160 err_support:
2161 
2162 	return NULL;
2163 }
2164 
2165 void
mlx5_tc_ct_clean(struct mlx5_tc_ct_priv * ct_priv)2166 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
2167 {
2168 	struct mlx5_fs_chains *chains;
2169 
2170 	if (!ct_priv)
2171 		return;
2172 
2173 	chains = ct_priv->chains;
2174 
2175 	mlx5_chains_destroy_global_table(chains, ct_priv->post_ct);
2176 	mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2177 	mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2178 	mapping_destroy(ct_priv->zone_mapping);
2179 	mapping_destroy(ct_priv->labels_mapping);
2180 
2181 	rhashtable_destroy(&ct_priv->ct_tuples_ht);
2182 	rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2183 	rhashtable_destroy(&ct_priv->zone_ht);
2184 	mutex_destroy(&ct_priv->control_lock);
2185 	idr_destroy(&ct_priv->fte_ids);
2186 	kfree(ct_priv);
2187 }
2188 
2189 bool
mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv * ct_priv,struct sk_buff * skb,u8 zone_restore_id)2190 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
2191 			 struct sk_buff *skb, u8 zone_restore_id)
2192 {
2193 	struct mlx5_ct_tuple tuple = {};
2194 	struct mlx5_ct_entry *entry;
2195 	u16 zone;
2196 
2197 	if (!ct_priv || !zone_restore_id)
2198 		return true;
2199 
2200 	if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
2201 		return false;
2202 
2203 	if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
2204 		return false;
2205 
2206 	spin_lock(&ct_priv->ht_lock);
2207 
2208 	entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
2209 	if (!entry) {
2210 		spin_unlock(&ct_priv->ht_lock);
2211 		return false;
2212 	}
2213 
2214 	if (IS_ERR(entry)) {
2215 		spin_unlock(&ct_priv->ht_lock);
2216 		return false;
2217 	}
2218 	spin_unlock(&ct_priv->ht_lock);
2219 
2220 	tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
2221 	__mlx5_tc_ct_entry_put(entry);
2222 
2223 	return true;
2224 }
2225