xref: /OK3568_Linux_fs/kernel/net/openvswitch/actions.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright (c) 2007-2017 Nicira, Inc.
4*4882a593Smuzhiyun  */
5*4882a593Smuzhiyun 
6*4882a593Smuzhiyun #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #include <linux/skbuff.h>
9*4882a593Smuzhiyun #include <linux/in.h>
10*4882a593Smuzhiyun #include <linux/ip.h>
11*4882a593Smuzhiyun #include <linux/openvswitch.h>
12*4882a593Smuzhiyun #include <linux/sctp.h>
13*4882a593Smuzhiyun #include <linux/tcp.h>
14*4882a593Smuzhiyun #include <linux/udp.h>
15*4882a593Smuzhiyun #include <linux/in6.h>
16*4882a593Smuzhiyun #include <linux/if_arp.h>
17*4882a593Smuzhiyun #include <linux/if_vlan.h>
18*4882a593Smuzhiyun 
19*4882a593Smuzhiyun #include <net/dst.h>
20*4882a593Smuzhiyun #include <net/ip.h>
21*4882a593Smuzhiyun #include <net/ipv6.h>
22*4882a593Smuzhiyun #include <net/ip6_fib.h>
23*4882a593Smuzhiyun #include <net/checksum.h>
24*4882a593Smuzhiyun #include <net/dsfield.h>
25*4882a593Smuzhiyun #include <net/mpls.h>
26*4882a593Smuzhiyun #include <net/sctp/checksum.h>
27*4882a593Smuzhiyun 
28*4882a593Smuzhiyun #include "datapath.h"
29*4882a593Smuzhiyun #include "flow.h"
30*4882a593Smuzhiyun #include "conntrack.h"
31*4882a593Smuzhiyun #include "vport.h"
32*4882a593Smuzhiyun #include "flow_netlink.h"
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun struct deferred_action {
35*4882a593Smuzhiyun 	struct sk_buff *skb;
36*4882a593Smuzhiyun 	const struct nlattr *actions;
37*4882a593Smuzhiyun 	int actions_len;
38*4882a593Smuzhiyun 
39*4882a593Smuzhiyun 	/* Store pkt_key clone when creating deferred action. */
40*4882a593Smuzhiyun 	struct sw_flow_key pkt_key;
41*4882a593Smuzhiyun };
42*4882a593Smuzhiyun 
43*4882a593Smuzhiyun #define MAX_L2_LEN	(VLAN_ETH_HLEN + 3 * MPLS_HLEN)
44*4882a593Smuzhiyun struct ovs_frag_data {
45*4882a593Smuzhiyun 	unsigned long dst;
46*4882a593Smuzhiyun 	struct vport *vport;
47*4882a593Smuzhiyun 	struct ovs_skb_cb cb;
48*4882a593Smuzhiyun 	__be16 inner_protocol;
49*4882a593Smuzhiyun 	u16 network_offset;	/* valid only for MPLS */
50*4882a593Smuzhiyun 	u16 vlan_tci;
51*4882a593Smuzhiyun 	__be16 vlan_proto;
52*4882a593Smuzhiyun 	unsigned int l2_len;
53*4882a593Smuzhiyun 	u8 mac_proto;
54*4882a593Smuzhiyun 	u8 l2_data[MAX_L2_LEN];
55*4882a593Smuzhiyun };
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage);
58*4882a593Smuzhiyun 
59*4882a593Smuzhiyun #define DEFERRED_ACTION_FIFO_SIZE 10
60*4882a593Smuzhiyun #define OVS_RECURSION_LIMIT 5
61*4882a593Smuzhiyun #define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2)
62*4882a593Smuzhiyun struct action_fifo {
63*4882a593Smuzhiyun 	int head;
64*4882a593Smuzhiyun 	int tail;
65*4882a593Smuzhiyun 	/* Deferred action fifo queue storage. */
66*4882a593Smuzhiyun 	struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
67*4882a593Smuzhiyun };
68*4882a593Smuzhiyun 
69*4882a593Smuzhiyun struct action_flow_keys {
70*4882a593Smuzhiyun 	struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
71*4882a593Smuzhiyun };
72*4882a593Smuzhiyun 
73*4882a593Smuzhiyun static struct action_fifo __percpu *action_fifos;
74*4882a593Smuzhiyun static struct action_flow_keys __percpu *flow_keys;
75*4882a593Smuzhiyun static DEFINE_PER_CPU(int, exec_actions_level);
76*4882a593Smuzhiyun 
77*4882a593Smuzhiyun /* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
78*4882a593Smuzhiyun  * space. Return NULL if out of key spaces.
79*4882a593Smuzhiyun  */
clone_key(const struct sw_flow_key * key_)80*4882a593Smuzhiyun static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
81*4882a593Smuzhiyun {
82*4882a593Smuzhiyun 	struct action_flow_keys *keys = this_cpu_ptr(flow_keys);
83*4882a593Smuzhiyun 	int level = this_cpu_read(exec_actions_level);
84*4882a593Smuzhiyun 	struct sw_flow_key *key = NULL;
85*4882a593Smuzhiyun 
86*4882a593Smuzhiyun 	if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
87*4882a593Smuzhiyun 		key = &keys->key[level - 1];
88*4882a593Smuzhiyun 		*key = *key_;
89*4882a593Smuzhiyun 	}
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun 	return key;
92*4882a593Smuzhiyun }
93*4882a593Smuzhiyun 
action_fifo_init(struct action_fifo * fifo)94*4882a593Smuzhiyun static void action_fifo_init(struct action_fifo *fifo)
95*4882a593Smuzhiyun {
96*4882a593Smuzhiyun 	fifo->head = 0;
97*4882a593Smuzhiyun 	fifo->tail = 0;
98*4882a593Smuzhiyun }
99*4882a593Smuzhiyun 
action_fifo_is_empty(const struct action_fifo * fifo)100*4882a593Smuzhiyun static bool action_fifo_is_empty(const struct action_fifo *fifo)
101*4882a593Smuzhiyun {
102*4882a593Smuzhiyun 	return (fifo->head == fifo->tail);
103*4882a593Smuzhiyun }
104*4882a593Smuzhiyun 
action_fifo_get(struct action_fifo * fifo)105*4882a593Smuzhiyun static struct deferred_action *action_fifo_get(struct action_fifo *fifo)
106*4882a593Smuzhiyun {
107*4882a593Smuzhiyun 	if (action_fifo_is_empty(fifo))
108*4882a593Smuzhiyun 		return NULL;
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun 	return &fifo->fifo[fifo->tail++];
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun 
action_fifo_put(struct action_fifo * fifo)113*4882a593Smuzhiyun static struct deferred_action *action_fifo_put(struct action_fifo *fifo)
114*4882a593Smuzhiyun {
115*4882a593Smuzhiyun 	if (fifo->head >= DEFERRED_ACTION_FIFO_SIZE - 1)
116*4882a593Smuzhiyun 		return NULL;
117*4882a593Smuzhiyun 
118*4882a593Smuzhiyun 	return &fifo->fifo[fifo->head++];
119*4882a593Smuzhiyun }
120*4882a593Smuzhiyun 
121*4882a593Smuzhiyun /* Return true if fifo is not full */
add_deferred_actions(struct sk_buff * skb,const struct sw_flow_key * key,const struct nlattr * actions,const int actions_len)122*4882a593Smuzhiyun static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
123*4882a593Smuzhiyun 				    const struct sw_flow_key *key,
124*4882a593Smuzhiyun 				    const struct nlattr *actions,
125*4882a593Smuzhiyun 				    const int actions_len)
126*4882a593Smuzhiyun {
127*4882a593Smuzhiyun 	struct action_fifo *fifo;
128*4882a593Smuzhiyun 	struct deferred_action *da;
129*4882a593Smuzhiyun 
130*4882a593Smuzhiyun 	fifo = this_cpu_ptr(action_fifos);
131*4882a593Smuzhiyun 	da = action_fifo_put(fifo);
132*4882a593Smuzhiyun 	if (da) {
133*4882a593Smuzhiyun 		da->skb = skb;
134*4882a593Smuzhiyun 		da->actions = actions;
135*4882a593Smuzhiyun 		da->actions_len = actions_len;
136*4882a593Smuzhiyun 		da->pkt_key = *key;
137*4882a593Smuzhiyun 	}
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun 	return da;
140*4882a593Smuzhiyun }
141*4882a593Smuzhiyun 
invalidate_flow_key(struct sw_flow_key * key)142*4882a593Smuzhiyun static void invalidate_flow_key(struct sw_flow_key *key)
143*4882a593Smuzhiyun {
144*4882a593Smuzhiyun 	key->mac_proto |= SW_FLOW_KEY_INVALID;
145*4882a593Smuzhiyun }
146*4882a593Smuzhiyun 
is_flow_key_valid(const struct sw_flow_key * key)147*4882a593Smuzhiyun static bool is_flow_key_valid(const struct sw_flow_key *key)
148*4882a593Smuzhiyun {
149*4882a593Smuzhiyun 	return !(key->mac_proto & SW_FLOW_KEY_INVALID);
150*4882a593Smuzhiyun }
151*4882a593Smuzhiyun 
152*4882a593Smuzhiyun static int clone_execute(struct datapath *dp, struct sk_buff *skb,
153*4882a593Smuzhiyun 			 struct sw_flow_key *key,
154*4882a593Smuzhiyun 			 u32 recirc_id,
155*4882a593Smuzhiyun 			 const struct nlattr *actions, int len,
156*4882a593Smuzhiyun 			 bool last, bool clone_flow_key);
157*4882a593Smuzhiyun 
158*4882a593Smuzhiyun static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
159*4882a593Smuzhiyun 			      struct sw_flow_key *key,
160*4882a593Smuzhiyun 			      const struct nlattr *attr, int len);
161*4882a593Smuzhiyun 
push_mpls(struct sk_buff * skb,struct sw_flow_key * key,__be32 mpls_lse,__be16 mpls_ethertype,__u16 mac_len)162*4882a593Smuzhiyun static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
163*4882a593Smuzhiyun 		     __be32 mpls_lse, __be16 mpls_ethertype, __u16 mac_len)
164*4882a593Smuzhiyun {
165*4882a593Smuzhiyun 	int err;
166*4882a593Smuzhiyun 
167*4882a593Smuzhiyun 	err = skb_mpls_push(skb, mpls_lse, mpls_ethertype, mac_len, !!mac_len);
168*4882a593Smuzhiyun 	if (err)
169*4882a593Smuzhiyun 		return err;
170*4882a593Smuzhiyun 
171*4882a593Smuzhiyun 	if (!mac_len)
172*4882a593Smuzhiyun 		key->mac_proto = MAC_PROTO_NONE;
173*4882a593Smuzhiyun 
174*4882a593Smuzhiyun 	invalidate_flow_key(key);
175*4882a593Smuzhiyun 	return 0;
176*4882a593Smuzhiyun }
177*4882a593Smuzhiyun 
pop_mpls(struct sk_buff * skb,struct sw_flow_key * key,const __be16 ethertype)178*4882a593Smuzhiyun static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
179*4882a593Smuzhiyun 		    const __be16 ethertype)
180*4882a593Smuzhiyun {
181*4882a593Smuzhiyun 	int err;
182*4882a593Smuzhiyun 
183*4882a593Smuzhiyun 	err = skb_mpls_pop(skb, ethertype, skb->mac_len,
184*4882a593Smuzhiyun 			   ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET);
185*4882a593Smuzhiyun 	if (err)
186*4882a593Smuzhiyun 		return err;
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun 	if (ethertype == htons(ETH_P_TEB))
189*4882a593Smuzhiyun 		key->mac_proto = MAC_PROTO_ETHERNET;
190*4882a593Smuzhiyun 
191*4882a593Smuzhiyun 	invalidate_flow_key(key);
192*4882a593Smuzhiyun 	return 0;
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun 
set_mpls(struct sk_buff * skb,struct sw_flow_key * flow_key,const __be32 * mpls_lse,const __be32 * mask)195*4882a593Smuzhiyun static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
196*4882a593Smuzhiyun 		    const __be32 *mpls_lse, const __be32 *mask)
197*4882a593Smuzhiyun {
198*4882a593Smuzhiyun 	struct mpls_shim_hdr *stack;
199*4882a593Smuzhiyun 	__be32 lse;
200*4882a593Smuzhiyun 	int err;
201*4882a593Smuzhiyun 
202*4882a593Smuzhiyun 	if (!pskb_may_pull(skb, skb_network_offset(skb) + MPLS_HLEN))
203*4882a593Smuzhiyun 		return -ENOMEM;
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun 	stack = mpls_hdr(skb);
206*4882a593Smuzhiyun 	lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask);
207*4882a593Smuzhiyun 	err = skb_mpls_update_lse(skb, lse);
208*4882a593Smuzhiyun 	if (err)
209*4882a593Smuzhiyun 		return err;
210*4882a593Smuzhiyun 
211*4882a593Smuzhiyun 	flow_key->mpls.lse[0] = lse;
212*4882a593Smuzhiyun 	return 0;
213*4882a593Smuzhiyun }
214*4882a593Smuzhiyun 
pop_vlan(struct sk_buff * skb,struct sw_flow_key * key)215*4882a593Smuzhiyun static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
216*4882a593Smuzhiyun {
217*4882a593Smuzhiyun 	int err;
218*4882a593Smuzhiyun 
219*4882a593Smuzhiyun 	err = skb_vlan_pop(skb);
220*4882a593Smuzhiyun 	if (skb_vlan_tag_present(skb)) {
221*4882a593Smuzhiyun 		invalidate_flow_key(key);
222*4882a593Smuzhiyun 	} else {
223*4882a593Smuzhiyun 		key->eth.vlan.tci = 0;
224*4882a593Smuzhiyun 		key->eth.vlan.tpid = 0;
225*4882a593Smuzhiyun 	}
226*4882a593Smuzhiyun 	return err;
227*4882a593Smuzhiyun }
228*4882a593Smuzhiyun 
push_vlan(struct sk_buff * skb,struct sw_flow_key * key,const struct ovs_action_push_vlan * vlan)229*4882a593Smuzhiyun static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
230*4882a593Smuzhiyun 		     const struct ovs_action_push_vlan *vlan)
231*4882a593Smuzhiyun {
232*4882a593Smuzhiyun 	if (skb_vlan_tag_present(skb)) {
233*4882a593Smuzhiyun 		invalidate_flow_key(key);
234*4882a593Smuzhiyun 	} else {
235*4882a593Smuzhiyun 		key->eth.vlan.tci = vlan->vlan_tci;
236*4882a593Smuzhiyun 		key->eth.vlan.tpid = vlan->vlan_tpid;
237*4882a593Smuzhiyun 	}
238*4882a593Smuzhiyun 	return skb_vlan_push(skb, vlan->vlan_tpid,
239*4882a593Smuzhiyun 			     ntohs(vlan->vlan_tci) & ~VLAN_CFI_MASK);
240*4882a593Smuzhiyun }
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun /* 'src' is already properly masked. */
ether_addr_copy_masked(u8 * dst_,const u8 * src_,const u8 * mask_)243*4882a593Smuzhiyun static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
244*4882a593Smuzhiyun {
245*4882a593Smuzhiyun 	u16 *dst = (u16 *)dst_;
246*4882a593Smuzhiyun 	const u16 *src = (const u16 *)src_;
247*4882a593Smuzhiyun 	const u16 *mask = (const u16 *)mask_;
248*4882a593Smuzhiyun 
249*4882a593Smuzhiyun 	OVS_SET_MASKED(dst[0], src[0], mask[0]);
250*4882a593Smuzhiyun 	OVS_SET_MASKED(dst[1], src[1], mask[1]);
251*4882a593Smuzhiyun 	OVS_SET_MASKED(dst[2], src[2], mask[2]);
252*4882a593Smuzhiyun }
253*4882a593Smuzhiyun 
set_eth_addr(struct sk_buff * skb,struct sw_flow_key * flow_key,const struct ovs_key_ethernet * key,const struct ovs_key_ethernet * mask)254*4882a593Smuzhiyun static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
255*4882a593Smuzhiyun 			const struct ovs_key_ethernet *key,
256*4882a593Smuzhiyun 			const struct ovs_key_ethernet *mask)
257*4882a593Smuzhiyun {
258*4882a593Smuzhiyun 	int err;
259*4882a593Smuzhiyun 
260*4882a593Smuzhiyun 	err = skb_ensure_writable(skb, ETH_HLEN);
261*4882a593Smuzhiyun 	if (unlikely(err))
262*4882a593Smuzhiyun 		return err;
263*4882a593Smuzhiyun 
264*4882a593Smuzhiyun 	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 	ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
267*4882a593Smuzhiyun 			       mask->eth_src);
268*4882a593Smuzhiyun 	ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
269*4882a593Smuzhiyun 			       mask->eth_dst);
270*4882a593Smuzhiyun 
271*4882a593Smuzhiyun 	skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
272*4882a593Smuzhiyun 
273*4882a593Smuzhiyun 	ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
274*4882a593Smuzhiyun 	ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
275*4882a593Smuzhiyun 	return 0;
276*4882a593Smuzhiyun }
277*4882a593Smuzhiyun 
278*4882a593Smuzhiyun /* pop_eth does not support VLAN packets as this action is never called
279*4882a593Smuzhiyun  * for them.
280*4882a593Smuzhiyun  */
pop_eth(struct sk_buff * skb,struct sw_flow_key * key)281*4882a593Smuzhiyun static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key)
282*4882a593Smuzhiyun {
283*4882a593Smuzhiyun 	int err;
284*4882a593Smuzhiyun 
285*4882a593Smuzhiyun 	err = skb_eth_pop(skb);
286*4882a593Smuzhiyun 	if (err)
287*4882a593Smuzhiyun 		return err;
288*4882a593Smuzhiyun 
289*4882a593Smuzhiyun 	/* safe right before invalidate_flow_key */
290*4882a593Smuzhiyun 	key->mac_proto = MAC_PROTO_NONE;
291*4882a593Smuzhiyun 	invalidate_flow_key(key);
292*4882a593Smuzhiyun 	return 0;
293*4882a593Smuzhiyun }
294*4882a593Smuzhiyun 
push_eth(struct sk_buff * skb,struct sw_flow_key * key,const struct ovs_action_push_eth * ethh)295*4882a593Smuzhiyun static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
296*4882a593Smuzhiyun 		    const struct ovs_action_push_eth *ethh)
297*4882a593Smuzhiyun {
298*4882a593Smuzhiyun 	int err;
299*4882a593Smuzhiyun 
300*4882a593Smuzhiyun 	err = skb_eth_push(skb, ethh->addresses.eth_dst,
301*4882a593Smuzhiyun 			   ethh->addresses.eth_src);
302*4882a593Smuzhiyun 	if (err)
303*4882a593Smuzhiyun 		return err;
304*4882a593Smuzhiyun 
305*4882a593Smuzhiyun 	/* safe right before invalidate_flow_key */
306*4882a593Smuzhiyun 	key->mac_proto = MAC_PROTO_ETHERNET;
307*4882a593Smuzhiyun 	invalidate_flow_key(key);
308*4882a593Smuzhiyun 	return 0;
309*4882a593Smuzhiyun }
310*4882a593Smuzhiyun 
push_nsh(struct sk_buff * skb,struct sw_flow_key * key,const struct nshhdr * nh)311*4882a593Smuzhiyun static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key,
312*4882a593Smuzhiyun 		    const struct nshhdr *nh)
313*4882a593Smuzhiyun {
314*4882a593Smuzhiyun 	int err;
315*4882a593Smuzhiyun 
316*4882a593Smuzhiyun 	err = nsh_push(skb, nh);
317*4882a593Smuzhiyun 	if (err)
318*4882a593Smuzhiyun 		return err;
319*4882a593Smuzhiyun 
320*4882a593Smuzhiyun 	/* safe right before invalidate_flow_key */
321*4882a593Smuzhiyun 	key->mac_proto = MAC_PROTO_NONE;
322*4882a593Smuzhiyun 	invalidate_flow_key(key);
323*4882a593Smuzhiyun 	return 0;
324*4882a593Smuzhiyun }
325*4882a593Smuzhiyun 
pop_nsh(struct sk_buff * skb,struct sw_flow_key * key)326*4882a593Smuzhiyun static int pop_nsh(struct sk_buff *skb, struct sw_flow_key *key)
327*4882a593Smuzhiyun {
328*4882a593Smuzhiyun 	int err;
329*4882a593Smuzhiyun 
330*4882a593Smuzhiyun 	err = nsh_pop(skb);
331*4882a593Smuzhiyun 	if (err)
332*4882a593Smuzhiyun 		return err;
333*4882a593Smuzhiyun 
334*4882a593Smuzhiyun 	/* safe right before invalidate_flow_key */
335*4882a593Smuzhiyun 	if (skb->protocol == htons(ETH_P_TEB))
336*4882a593Smuzhiyun 		key->mac_proto = MAC_PROTO_ETHERNET;
337*4882a593Smuzhiyun 	else
338*4882a593Smuzhiyun 		key->mac_proto = MAC_PROTO_NONE;
339*4882a593Smuzhiyun 	invalidate_flow_key(key);
340*4882a593Smuzhiyun 	return 0;
341*4882a593Smuzhiyun }
342*4882a593Smuzhiyun 
update_ip_l4_checksum(struct sk_buff * skb,struct iphdr * nh,__be32 addr,__be32 new_addr)343*4882a593Smuzhiyun static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
344*4882a593Smuzhiyun 				  __be32 addr, __be32 new_addr)
345*4882a593Smuzhiyun {
346*4882a593Smuzhiyun 	int transport_len = skb->len - skb_transport_offset(skb);
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun 	if (nh->frag_off & htons(IP_OFFSET))
349*4882a593Smuzhiyun 		return;
350*4882a593Smuzhiyun 
351*4882a593Smuzhiyun 	if (nh->protocol == IPPROTO_TCP) {
352*4882a593Smuzhiyun 		if (likely(transport_len >= sizeof(struct tcphdr)))
353*4882a593Smuzhiyun 			inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
354*4882a593Smuzhiyun 						 addr, new_addr, true);
355*4882a593Smuzhiyun 	} else if (nh->protocol == IPPROTO_UDP) {
356*4882a593Smuzhiyun 		if (likely(transport_len >= sizeof(struct udphdr))) {
357*4882a593Smuzhiyun 			struct udphdr *uh = udp_hdr(skb);
358*4882a593Smuzhiyun 
359*4882a593Smuzhiyun 			if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
360*4882a593Smuzhiyun 				inet_proto_csum_replace4(&uh->check, skb,
361*4882a593Smuzhiyun 							 addr, new_addr, true);
362*4882a593Smuzhiyun 				if (!uh->check)
363*4882a593Smuzhiyun 					uh->check = CSUM_MANGLED_0;
364*4882a593Smuzhiyun 			}
365*4882a593Smuzhiyun 		}
366*4882a593Smuzhiyun 	}
367*4882a593Smuzhiyun }
368*4882a593Smuzhiyun 
set_ip_addr(struct sk_buff * skb,struct iphdr * nh,__be32 * addr,__be32 new_addr)369*4882a593Smuzhiyun static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
370*4882a593Smuzhiyun 			__be32 *addr, __be32 new_addr)
371*4882a593Smuzhiyun {
372*4882a593Smuzhiyun 	update_ip_l4_checksum(skb, nh, *addr, new_addr);
373*4882a593Smuzhiyun 	csum_replace4(&nh->check, *addr, new_addr);
374*4882a593Smuzhiyun 	skb_clear_hash(skb);
375*4882a593Smuzhiyun 	ovs_ct_clear(skb, NULL);
376*4882a593Smuzhiyun 	*addr = new_addr;
377*4882a593Smuzhiyun }
378*4882a593Smuzhiyun 
update_ipv6_checksum(struct sk_buff * skb,u8 l4_proto,__be32 addr[4],const __be32 new_addr[4])379*4882a593Smuzhiyun static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
380*4882a593Smuzhiyun 				 __be32 addr[4], const __be32 new_addr[4])
381*4882a593Smuzhiyun {
382*4882a593Smuzhiyun 	int transport_len = skb->len - skb_transport_offset(skb);
383*4882a593Smuzhiyun 
384*4882a593Smuzhiyun 	if (l4_proto == NEXTHDR_TCP) {
385*4882a593Smuzhiyun 		if (likely(transport_len >= sizeof(struct tcphdr)))
386*4882a593Smuzhiyun 			inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
387*4882a593Smuzhiyun 						  addr, new_addr, true);
388*4882a593Smuzhiyun 	} else if (l4_proto == NEXTHDR_UDP) {
389*4882a593Smuzhiyun 		if (likely(transport_len >= sizeof(struct udphdr))) {
390*4882a593Smuzhiyun 			struct udphdr *uh = udp_hdr(skb);
391*4882a593Smuzhiyun 
392*4882a593Smuzhiyun 			if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
393*4882a593Smuzhiyun 				inet_proto_csum_replace16(&uh->check, skb,
394*4882a593Smuzhiyun 							  addr, new_addr, true);
395*4882a593Smuzhiyun 				if (!uh->check)
396*4882a593Smuzhiyun 					uh->check = CSUM_MANGLED_0;
397*4882a593Smuzhiyun 			}
398*4882a593Smuzhiyun 		}
399*4882a593Smuzhiyun 	} else if (l4_proto == NEXTHDR_ICMP) {
400*4882a593Smuzhiyun 		if (likely(transport_len >= sizeof(struct icmp6hdr)))
401*4882a593Smuzhiyun 			inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum,
402*4882a593Smuzhiyun 						  skb, addr, new_addr, true);
403*4882a593Smuzhiyun 	}
404*4882a593Smuzhiyun }
405*4882a593Smuzhiyun 
mask_ipv6_addr(const __be32 old[4],const __be32 addr[4],const __be32 mask[4],__be32 masked[4])406*4882a593Smuzhiyun static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
407*4882a593Smuzhiyun 			   const __be32 mask[4], __be32 masked[4])
408*4882a593Smuzhiyun {
409*4882a593Smuzhiyun 	masked[0] = OVS_MASKED(old[0], addr[0], mask[0]);
410*4882a593Smuzhiyun 	masked[1] = OVS_MASKED(old[1], addr[1], mask[1]);
411*4882a593Smuzhiyun 	masked[2] = OVS_MASKED(old[2], addr[2], mask[2]);
412*4882a593Smuzhiyun 	masked[3] = OVS_MASKED(old[3], addr[3], mask[3]);
413*4882a593Smuzhiyun }
414*4882a593Smuzhiyun 
set_ipv6_addr(struct sk_buff * skb,u8 l4_proto,__be32 addr[4],const __be32 new_addr[4],bool recalculate_csum)415*4882a593Smuzhiyun static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
416*4882a593Smuzhiyun 			  __be32 addr[4], const __be32 new_addr[4],
417*4882a593Smuzhiyun 			  bool recalculate_csum)
418*4882a593Smuzhiyun {
419*4882a593Smuzhiyun 	if (recalculate_csum)
420*4882a593Smuzhiyun 		update_ipv6_checksum(skb, l4_proto, addr, new_addr);
421*4882a593Smuzhiyun 
422*4882a593Smuzhiyun 	skb_clear_hash(skb);
423*4882a593Smuzhiyun 	ovs_ct_clear(skb, NULL);
424*4882a593Smuzhiyun 	memcpy(addr, new_addr, sizeof(__be32[4]));
425*4882a593Smuzhiyun }
426*4882a593Smuzhiyun 
set_ipv6_dsfield(struct sk_buff * skb,struct ipv6hdr * nh,u8 ipv6_tclass,u8 mask)427*4882a593Smuzhiyun static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, u8 ipv6_tclass, u8 mask)
428*4882a593Smuzhiyun {
429*4882a593Smuzhiyun 	u8 old_ipv6_tclass = ipv6_get_dsfield(nh);
430*4882a593Smuzhiyun 
431*4882a593Smuzhiyun 	ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask);
432*4882a593Smuzhiyun 
433*4882a593Smuzhiyun 	if (skb->ip_summed == CHECKSUM_COMPLETE)
434*4882a593Smuzhiyun 		csum_replace(&skb->csum, (__force __wsum)(old_ipv6_tclass << 12),
435*4882a593Smuzhiyun 			     (__force __wsum)(ipv6_tclass << 12));
436*4882a593Smuzhiyun 
437*4882a593Smuzhiyun 	ipv6_change_dsfield(nh, ~mask, ipv6_tclass);
438*4882a593Smuzhiyun }
439*4882a593Smuzhiyun 
set_ipv6_fl(struct sk_buff * skb,struct ipv6hdr * nh,u32 fl,u32 mask)440*4882a593Smuzhiyun static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 mask)
441*4882a593Smuzhiyun {
442*4882a593Smuzhiyun 	u32 ofl;
443*4882a593Smuzhiyun 
444*4882a593Smuzhiyun 	ofl = nh->flow_lbl[0] << 16 |  nh->flow_lbl[1] << 8 |  nh->flow_lbl[2];
445*4882a593Smuzhiyun 	fl = OVS_MASKED(ofl, fl, mask);
446*4882a593Smuzhiyun 
447*4882a593Smuzhiyun 	/* Bits 21-24 are always unmasked, so this retains their values. */
448*4882a593Smuzhiyun 	nh->flow_lbl[0] = (u8)(fl >> 16);
449*4882a593Smuzhiyun 	nh->flow_lbl[1] = (u8)(fl >> 8);
450*4882a593Smuzhiyun 	nh->flow_lbl[2] = (u8)fl;
451*4882a593Smuzhiyun 
452*4882a593Smuzhiyun 	if (skb->ip_summed == CHECKSUM_COMPLETE)
453*4882a593Smuzhiyun 		csum_replace(&skb->csum, (__force __wsum)htonl(ofl), (__force __wsum)htonl(fl));
454*4882a593Smuzhiyun }
455*4882a593Smuzhiyun 
set_ipv6_ttl(struct sk_buff * skb,struct ipv6hdr * nh,u8 new_ttl,u8 mask)456*4882a593Smuzhiyun static void set_ipv6_ttl(struct sk_buff *skb, struct ipv6hdr *nh, u8 new_ttl, u8 mask)
457*4882a593Smuzhiyun {
458*4882a593Smuzhiyun 	new_ttl = OVS_MASKED(nh->hop_limit, new_ttl, mask);
459*4882a593Smuzhiyun 
460*4882a593Smuzhiyun 	if (skb->ip_summed == CHECKSUM_COMPLETE)
461*4882a593Smuzhiyun 		csum_replace(&skb->csum, (__force __wsum)(nh->hop_limit << 8),
462*4882a593Smuzhiyun 			     (__force __wsum)(new_ttl << 8));
463*4882a593Smuzhiyun 	nh->hop_limit = new_ttl;
464*4882a593Smuzhiyun }
465*4882a593Smuzhiyun 
set_ip_ttl(struct sk_buff * skb,struct iphdr * nh,u8 new_ttl,u8 mask)466*4882a593Smuzhiyun static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
467*4882a593Smuzhiyun 		       u8 mask)
468*4882a593Smuzhiyun {
469*4882a593Smuzhiyun 	new_ttl = OVS_MASKED(nh->ttl, new_ttl, mask);
470*4882a593Smuzhiyun 
471*4882a593Smuzhiyun 	csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
472*4882a593Smuzhiyun 	nh->ttl = new_ttl;
473*4882a593Smuzhiyun }
474*4882a593Smuzhiyun 
set_ipv4(struct sk_buff * skb,struct sw_flow_key * flow_key,const struct ovs_key_ipv4 * key,const struct ovs_key_ipv4 * mask)475*4882a593Smuzhiyun static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
476*4882a593Smuzhiyun 		    const struct ovs_key_ipv4 *key,
477*4882a593Smuzhiyun 		    const struct ovs_key_ipv4 *mask)
478*4882a593Smuzhiyun {
479*4882a593Smuzhiyun 	struct iphdr *nh;
480*4882a593Smuzhiyun 	__be32 new_addr;
481*4882a593Smuzhiyun 	int err;
482*4882a593Smuzhiyun 
483*4882a593Smuzhiyun 	err = skb_ensure_writable(skb, skb_network_offset(skb) +
484*4882a593Smuzhiyun 				  sizeof(struct iphdr));
485*4882a593Smuzhiyun 	if (unlikely(err))
486*4882a593Smuzhiyun 		return err;
487*4882a593Smuzhiyun 
488*4882a593Smuzhiyun 	nh = ip_hdr(skb);
489*4882a593Smuzhiyun 
490*4882a593Smuzhiyun 	/* Setting an IP addresses is typically only a side effect of
491*4882a593Smuzhiyun 	 * matching on them in the current userspace implementation, so it
492*4882a593Smuzhiyun 	 * makes sense to check if the value actually changed.
493*4882a593Smuzhiyun 	 */
494*4882a593Smuzhiyun 	if (mask->ipv4_src) {
495*4882a593Smuzhiyun 		new_addr = OVS_MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
496*4882a593Smuzhiyun 
497*4882a593Smuzhiyun 		if (unlikely(new_addr != nh->saddr)) {
498*4882a593Smuzhiyun 			set_ip_addr(skb, nh, &nh->saddr, new_addr);
499*4882a593Smuzhiyun 			flow_key->ipv4.addr.src = new_addr;
500*4882a593Smuzhiyun 		}
501*4882a593Smuzhiyun 	}
502*4882a593Smuzhiyun 	if (mask->ipv4_dst) {
503*4882a593Smuzhiyun 		new_addr = OVS_MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
504*4882a593Smuzhiyun 
505*4882a593Smuzhiyun 		if (unlikely(new_addr != nh->daddr)) {
506*4882a593Smuzhiyun 			set_ip_addr(skb, nh, &nh->daddr, new_addr);
507*4882a593Smuzhiyun 			flow_key->ipv4.addr.dst = new_addr;
508*4882a593Smuzhiyun 		}
509*4882a593Smuzhiyun 	}
510*4882a593Smuzhiyun 	if (mask->ipv4_tos) {
511*4882a593Smuzhiyun 		ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
512*4882a593Smuzhiyun 		flow_key->ip.tos = nh->tos;
513*4882a593Smuzhiyun 	}
514*4882a593Smuzhiyun 	if (mask->ipv4_ttl) {
515*4882a593Smuzhiyun 		set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
516*4882a593Smuzhiyun 		flow_key->ip.ttl = nh->ttl;
517*4882a593Smuzhiyun 	}
518*4882a593Smuzhiyun 
519*4882a593Smuzhiyun 	return 0;
520*4882a593Smuzhiyun }
521*4882a593Smuzhiyun 
is_ipv6_mask_nonzero(const __be32 addr[4])522*4882a593Smuzhiyun static bool is_ipv6_mask_nonzero(const __be32 addr[4])
523*4882a593Smuzhiyun {
524*4882a593Smuzhiyun 	return !!(addr[0] | addr[1] | addr[2] | addr[3]);
525*4882a593Smuzhiyun }
526*4882a593Smuzhiyun 
set_ipv6(struct sk_buff * skb,struct sw_flow_key * flow_key,const struct ovs_key_ipv6 * key,const struct ovs_key_ipv6 * mask)527*4882a593Smuzhiyun static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
528*4882a593Smuzhiyun 		    const struct ovs_key_ipv6 *key,
529*4882a593Smuzhiyun 		    const struct ovs_key_ipv6 *mask)
530*4882a593Smuzhiyun {
531*4882a593Smuzhiyun 	struct ipv6hdr *nh;
532*4882a593Smuzhiyun 	int err;
533*4882a593Smuzhiyun 
534*4882a593Smuzhiyun 	err = skb_ensure_writable(skb, skb_network_offset(skb) +
535*4882a593Smuzhiyun 				  sizeof(struct ipv6hdr));
536*4882a593Smuzhiyun 	if (unlikely(err))
537*4882a593Smuzhiyun 		return err;
538*4882a593Smuzhiyun 
539*4882a593Smuzhiyun 	nh = ipv6_hdr(skb);
540*4882a593Smuzhiyun 
541*4882a593Smuzhiyun 	/* Setting an IP addresses is typically only a side effect of
542*4882a593Smuzhiyun 	 * matching on them in the current userspace implementation, so it
543*4882a593Smuzhiyun 	 * makes sense to check if the value actually changed.
544*4882a593Smuzhiyun 	 */
545*4882a593Smuzhiyun 	if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
546*4882a593Smuzhiyun 		__be32 *saddr = (__be32 *)&nh->saddr;
547*4882a593Smuzhiyun 		__be32 masked[4];
548*4882a593Smuzhiyun 
549*4882a593Smuzhiyun 		mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
550*4882a593Smuzhiyun 
551*4882a593Smuzhiyun 		if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
552*4882a593Smuzhiyun 			set_ipv6_addr(skb, flow_key->ip.proto, saddr, masked,
553*4882a593Smuzhiyun 				      true);
554*4882a593Smuzhiyun 			memcpy(&flow_key->ipv6.addr.src, masked,
555*4882a593Smuzhiyun 			       sizeof(flow_key->ipv6.addr.src));
556*4882a593Smuzhiyun 		}
557*4882a593Smuzhiyun 	}
558*4882a593Smuzhiyun 	if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
559*4882a593Smuzhiyun 		unsigned int offset = 0;
560*4882a593Smuzhiyun 		int flags = IP6_FH_F_SKIP_RH;
561*4882a593Smuzhiyun 		bool recalc_csum = true;
562*4882a593Smuzhiyun 		__be32 *daddr = (__be32 *)&nh->daddr;
563*4882a593Smuzhiyun 		__be32 masked[4];
564*4882a593Smuzhiyun 
565*4882a593Smuzhiyun 		mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);
566*4882a593Smuzhiyun 
567*4882a593Smuzhiyun 		if (unlikely(memcmp(daddr, masked, sizeof(masked)))) {
568*4882a593Smuzhiyun 			if (ipv6_ext_hdr(nh->nexthdr))
569*4882a593Smuzhiyun 				recalc_csum = (ipv6_find_hdr(skb, &offset,
570*4882a593Smuzhiyun 							     NEXTHDR_ROUTING,
571*4882a593Smuzhiyun 							     NULL, &flags)
572*4882a593Smuzhiyun 					       != NEXTHDR_ROUTING);
573*4882a593Smuzhiyun 
574*4882a593Smuzhiyun 			set_ipv6_addr(skb, flow_key->ip.proto, daddr, masked,
575*4882a593Smuzhiyun 				      recalc_csum);
576*4882a593Smuzhiyun 			memcpy(&flow_key->ipv6.addr.dst, masked,
577*4882a593Smuzhiyun 			       sizeof(flow_key->ipv6.addr.dst));
578*4882a593Smuzhiyun 		}
579*4882a593Smuzhiyun 	}
580*4882a593Smuzhiyun 	if (mask->ipv6_tclass) {
581*4882a593Smuzhiyun 		set_ipv6_dsfield(skb, nh, key->ipv6_tclass, mask->ipv6_tclass);
582*4882a593Smuzhiyun 		flow_key->ip.tos = ipv6_get_dsfield(nh);
583*4882a593Smuzhiyun 	}
584*4882a593Smuzhiyun 	if (mask->ipv6_label) {
585*4882a593Smuzhiyun 		set_ipv6_fl(skb, nh, ntohl(key->ipv6_label),
586*4882a593Smuzhiyun 			    ntohl(mask->ipv6_label));
587*4882a593Smuzhiyun 		flow_key->ipv6.label =
588*4882a593Smuzhiyun 		    *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
589*4882a593Smuzhiyun 	}
590*4882a593Smuzhiyun 	if (mask->ipv6_hlimit) {
591*4882a593Smuzhiyun 		set_ipv6_ttl(skb, nh, key->ipv6_hlimit, mask->ipv6_hlimit);
592*4882a593Smuzhiyun 		flow_key->ip.ttl = nh->hop_limit;
593*4882a593Smuzhiyun 	}
594*4882a593Smuzhiyun 	return 0;
595*4882a593Smuzhiyun }
596*4882a593Smuzhiyun 
set_nsh(struct sk_buff * skb,struct sw_flow_key * flow_key,const struct nlattr * a)597*4882a593Smuzhiyun static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
598*4882a593Smuzhiyun 		   const struct nlattr *a)
599*4882a593Smuzhiyun {
600*4882a593Smuzhiyun 	struct nshhdr *nh;
601*4882a593Smuzhiyun 	size_t length;
602*4882a593Smuzhiyun 	int err;
603*4882a593Smuzhiyun 	u8 flags;
604*4882a593Smuzhiyun 	u8 ttl;
605*4882a593Smuzhiyun 	int i;
606*4882a593Smuzhiyun 
607*4882a593Smuzhiyun 	struct ovs_key_nsh key;
608*4882a593Smuzhiyun 	struct ovs_key_nsh mask;
609*4882a593Smuzhiyun 
610*4882a593Smuzhiyun 	err = nsh_key_from_nlattr(a, &key, &mask);
611*4882a593Smuzhiyun 	if (err)
612*4882a593Smuzhiyun 		return err;
613*4882a593Smuzhiyun 
614*4882a593Smuzhiyun 	/* Make sure the NSH base header is there */
615*4882a593Smuzhiyun 	if (!pskb_may_pull(skb, skb_network_offset(skb) + NSH_BASE_HDR_LEN))
616*4882a593Smuzhiyun 		return -ENOMEM;
617*4882a593Smuzhiyun 
618*4882a593Smuzhiyun 	nh = nsh_hdr(skb);
619*4882a593Smuzhiyun 	length = nsh_hdr_len(nh);
620*4882a593Smuzhiyun 
621*4882a593Smuzhiyun 	/* Make sure the whole NSH header is there */
622*4882a593Smuzhiyun 	err = skb_ensure_writable(skb, skb_network_offset(skb) +
623*4882a593Smuzhiyun 				       length);
624*4882a593Smuzhiyun 	if (unlikely(err))
625*4882a593Smuzhiyun 		return err;
626*4882a593Smuzhiyun 
627*4882a593Smuzhiyun 	nh = nsh_hdr(skb);
628*4882a593Smuzhiyun 	skb_postpull_rcsum(skb, nh, length);
629*4882a593Smuzhiyun 	flags = nsh_get_flags(nh);
630*4882a593Smuzhiyun 	flags = OVS_MASKED(flags, key.base.flags, mask.base.flags);
631*4882a593Smuzhiyun 	flow_key->nsh.base.flags = flags;
632*4882a593Smuzhiyun 	ttl = nsh_get_ttl(nh);
633*4882a593Smuzhiyun 	ttl = OVS_MASKED(ttl, key.base.ttl, mask.base.ttl);
634*4882a593Smuzhiyun 	flow_key->nsh.base.ttl = ttl;
635*4882a593Smuzhiyun 	nsh_set_flags_and_ttl(nh, flags, ttl);
636*4882a593Smuzhiyun 	nh->path_hdr = OVS_MASKED(nh->path_hdr, key.base.path_hdr,
637*4882a593Smuzhiyun 				  mask.base.path_hdr);
638*4882a593Smuzhiyun 	flow_key->nsh.base.path_hdr = nh->path_hdr;
639*4882a593Smuzhiyun 	switch (nh->mdtype) {
640*4882a593Smuzhiyun 	case NSH_M_TYPE1:
641*4882a593Smuzhiyun 		for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) {
642*4882a593Smuzhiyun 			nh->md1.context[i] =
643*4882a593Smuzhiyun 			    OVS_MASKED(nh->md1.context[i], key.context[i],
644*4882a593Smuzhiyun 				       mask.context[i]);
645*4882a593Smuzhiyun 		}
646*4882a593Smuzhiyun 		memcpy(flow_key->nsh.context, nh->md1.context,
647*4882a593Smuzhiyun 		       sizeof(nh->md1.context));
648*4882a593Smuzhiyun 		break;
649*4882a593Smuzhiyun 	case NSH_M_TYPE2:
650*4882a593Smuzhiyun 		memset(flow_key->nsh.context, 0,
651*4882a593Smuzhiyun 		       sizeof(flow_key->nsh.context));
652*4882a593Smuzhiyun 		break;
653*4882a593Smuzhiyun 	default:
654*4882a593Smuzhiyun 		return -EINVAL;
655*4882a593Smuzhiyun 	}
656*4882a593Smuzhiyun 	skb_postpush_rcsum(skb, nh, length);
657*4882a593Smuzhiyun 	return 0;
658*4882a593Smuzhiyun }
659*4882a593Smuzhiyun 
660*4882a593Smuzhiyun /* Must follow skb_ensure_writable() since that can move the skb data. */
set_tp_port(struct sk_buff * skb,__be16 * port,__be16 new_port,__sum16 * check)661*4882a593Smuzhiyun static void set_tp_port(struct sk_buff *skb, __be16 *port,
662*4882a593Smuzhiyun 			__be16 new_port, __sum16 *check)
663*4882a593Smuzhiyun {
664*4882a593Smuzhiyun 	ovs_ct_clear(skb, NULL);
665*4882a593Smuzhiyun 	inet_proto_csum_replace2(check, skb, *port, new_port, false);
666*4882a593Smuzhiyun 	*port = new_port;
667*4882a593Smuzhiyun }
668*4882a593Smuzhiyun 
set_udp(struct sk_buff * skb,struct sw_flow_key * flow_key,const struct ovs_key_udp * key,const struct ovs_key_udp * mask)669*4882a593Smuzhiyun static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
670*4882a593Smuzhiyun 		   const struct ovs_key_udp *key,
671*4882a593Smuzhiyun 		   const struct ovs_key_udp *mask)
672*4882a593Smuzhiyun {
673*4882a593Smuzhiyun 	struct udphdr *uh;
674*4882a593Smuzhiyun 	__be16 src, dst;
675*4882a593Smuzhiyun 	int err;
676*4882a593Smuzhiyun 
677*4882a593Smuzhiyun 	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
678*4882a593Smuzhiyun 				  sizeof(struct udphdr));
679*4882a593Smuzhiyun 	if (unlikely(err))
680*4882a593Smuzhiyun 		return err;
681*4882a593Smuzhiyun 
682*4882a593Smuzhiyun 	uh = udp_hdr(skb);
683*4882a593Smuzhiyun 	/* Either of the masks is non-zero, so do not bother checking them. */
684*4882a593Smuzhiyun 	src = OVS_MASKED(uh->source, key->udp_src, mask->udp_src);
685*4882a593Smuzhiyun 	dst = OVS_MASKED(uh->dest, key->udp_dst, mask->udp_dst);
686*4882a593Smuzhiyun 
687*4882a593Smuzhiyun 	if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
688*4882a593Smuzhiyun 		if (likely(src != uh->source)) {
689*4882a593Smuzhiyun 			set_tp_port(skb, &uh->source, src, &uh->check);
690*4882a593Smuzhiyun 			flow_key->tp.src = src;
691*4882a593Smuzhiyun 		}
692*4882a593Smuzhiyun 		if (likely(dst != uh->dest)) {
693*4882a593Smuzhiyun 			set_tp_port(skb, &uh->dest, dst, &uh->check);
694*4882a593Smuzhiyun 			flow_key->tp.dst = dst;
695*4882a593Smuzhiyun 		}
696*4882a593Smuzhiyun 
697*4882a593Smuzhiyun 		if (unlikely(!uh->check))
698*4882a593Smuzhiyun 			uh->check = CSUM_MANGLED_0;
699*4882a593Smuzhiyun 	} else {
700*4882a593Smuzhiyun 		uh->source = src;
701*4882a593Smuzhiyun 		uh->dest = dst;
702*4882a593Smuzhiyun 		flow_key->tp.src = src;
703*4882a593Smuzhiyun 		flow_key->tp.dst = dst;
704*4882a593Smuzhiyun 		ovs_ct_clear(skb, NULL);
705*4882a593Smuzhiyun 	}
706*4882a593Smuzhiyun 
707*4882a593Smuzhiyun 	skb_clear_hash(skb);
708*4882a593Smuzhiyun 
709*4882a593Smuzhiyun 	return 0;
710*4882a593Smuzhiyun }
711*4882a593Smuzhiyun 
set_tcp(struct sk_buff * skb,struct sw_flow_key * flow_key,const struct ovs_key_tcp * key,const struct ovs_key_tcp * mask)712*4882a593Smuzhiyun static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
713*4882a593Smuzhiyun 		   const struct ovs_key_tcp *key,
714*4882a593Smuzhiyun 		   const struct ovs_key_tcp *mask)
715*4882a593Smuzhiyun {
716*4882a593Smuzhiyun 	struct tcphdr *th;
717*4882a593Smuzhiyun 	__be16 src, dst;
718*4882a593Smuzhiyun 	int err;
719*4882a593Smuzhiyun 
720*4882a593Smuzhiyun 	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
721*4882a593Smuzhiyun 				  sizeof(struct tcphdr));
722*4882a593Smuzhiyun 	if (unlikely(err))
723*4882a593Smuzhiyun 		return err;
724*4882a593Smuzhiyun 
725*4882a593Smuzhiyun 	th = tcp_hdr(skb);
726*4882a593Smuzhiyun 	src = OVS_MASKED(th->source, key->tcp_src, mask->tcp_src);
727*4882a593Smuzhiyun 	if (likely(src != th->source)) {
728*4882a593Smuzhiyun 		set_tp_port(skb, &th->source, src, &th->check);
729*4882a593Smuzhiyun 		flow_key->tp.src = src;
730*4882a593Smuzhiyun 	}
731*4882a593Smuzhiyun 	dst = OVS_MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
732*4882a593Smuzhiyun 	if (likely(dst != th->dest)) {
733*4882a593Smuzhiyun 		set_tp_port(skb, &th->dest, dst, &th->check);
734*4882a593Smuzhiyun 		flow_key->tp.dst = dst;
735*4882a593Smuzhiyun 	}
736*4882a593Smuzhiyun 	skb_clear_hash(skb);
737*4882a593Smuzhiyun 
738*4882a593Smuzhiyun 	return 0;
739*4882a593Smuzhiyun }
740*4882a593Smuzhiyun 
set_sctp(struct sk_buff * skb,struct sw_flow_key * flow_key,const struct ovs_key_sctp * key,const struct ovs_key_sctp * mask)741*4882a593Smuzhiyun static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
742*4882a593Smuzhiyun 		    const struct ovs_key_sctp *key,
743*4882a593Smuzhiyun 		    const struct ovs_key_sctp *mask)
744*4882a593Smuzhiyun {
745*4882a593Smuzhiyun 	unsigned int sctphoff = skb_transport_offset(skb);
746*4882a593Smuzhiyun 	struct sctphdr *sh;
747*4882a593Smuzhiyun 	__le32 old_correct_csum, new_csum, old_csum;
748*4882a593Smuzhiyun 	int err;
749*4882a593Smuzhiyun 
750*4882a593Smuzhiyun 	err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
751*4882a593Smuzhiyun 	if (unlikely(err))
752*4882a593Smuzhiyun 		return err;
753*4882a593Smuzhiyun 
754*4882a593Smuzhiyun 	sh = sctp_hdr(skb);
755*4882a593Smuzhiyun 	old_csum = sh->checksum;
756*4882a593Smuzhiyun 	old_correct_csum = sctp_compute_cksum(skb, sctphoff);
757*4882a593Smuzhiyun 
758*4882a593Smuzhiyun 	sh->source = OVS_MASKED(sh->source, key->sctp_src, mask->sctp_src);
759*4882a593Smuzhiyun 	sh->dest = OVS_MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
760*4882a593Smuzhiyun 
761*4882a593Smuzhiyun 	new_csum = sctp_compute_cksum(skb, sctphoff);
762*4882a593Smuzhiyun 
763*4882a593Smuzhiyun 	/* Carry any checksum errors through. */
764*4882a593Smuzhiyun 	sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
765*4882a593Smuzhiyun 
766*4882a593Smuzhiyun 	skb_clear_hash(skb);
767*4882a593Smuzhiyun 	ovs_ct_clear(skb, NULL);
768*4882a593Smuzhiyun 
769*4882a593Smuzhiyun 	flow_key->tp.src = sh->source;
770*4882a593Smuzhiyun 	flow_key->tp.dst = sh->dest;
771*4882a593Smuzhiyun 
772*4882a593Smuzhiyun 	return 0;
773*4882a593Smuzhiyun }
774*4882a593Smuzhiyun 
ovs_vport_output(struct net * net,struct sock * sk,struct sk_buff * skb)775*4882a593Smuzhiyun static int ovs_vport_output(struct net *net, struct sock *sk,
776*4882a593Smuzhiyun 			    struct sk_buff *skb)
777*4882a593Smuzhiyun {
778*4882a593Smuzhiyun 	struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
779*4882a593Smuzhiyun 	struct vport *vport = data->vport;
780*4882a593Smuzhiyun 
781*4882a593Smuzhiyun 	if (skb_cow_head(skb, data->l2_len) < 0) {
782*4882a593Smuzhiyun 		kfree_skb(skb);
783*4882a593Smuzhiyun 		return -ENOMEM;
784*4882a593Smuzhiyun 	}
785*4882a593Smuzhiyun 
786*4882a593Smuzhiyun 	__skb_dst_copy(skb, data->dst);
787*4882a593Smuzhiyun 	*OVS_CB(skb) = data->cb;
788*4882a593Smuzhiyun 	skb->inner_protocol = data->inner_protocol;
789*4882a593Smuzhiyun 	if (data->vlan_tci & VLAN_CFI_MASK)
790*4882a593Smuzhiyun 		__vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci & ~VLAN_CFI_MASK);
791*4882a593Smuzhiyun 	else
792*4882a593Smuzhiyun 		__vlan_hwaccel_clear_tag(skb);
793*4882a593Smuzhiyun 
794*4882a593Smuzhiyun 	/* Reconstruct the MAC header.  */
795*4882a593Smuzhiyun 	skb_push(skb, data->l2_len);
796*4882a593Smuzhiyun 	memcpy(skb->data, &data->l2_data, data->l2_len);
797*4882a593Smuzhiyun 	skb_postpush_rcsum(skb, skb->data, data->l2_len);
798*4882a593Smuzhiyun 	skb_reset_mac_header(skb);
799*4882a593Smuzhiyun 
800*4882a593Smuzhiyun 	if (eth_p_mpls(skb->protocol)) {
801*4882a593Smuzhiyun 		skb->inner_network_header = skb->network_header;
802*4882a593Smuzhiyun 		skb_set_network_header(skb, data->network_offset);
803*4882a593Smuzhiyun 		skb_reset_mac_len(skb);
804*4882a593Smuzhiyun 	}
805*4882a593Smuzhiyun 
806*4882a593Smuzhiyun 	ovs_vport_send(vport, skb, data->mac_proto);
807*4882a593Smuzhiyun 	return 0;
808*4882a593Smuzhiyun }
809*4882a593Smuzhiyun 
810*4882a593Smuzhiyun static unsigned int
ovs_dst_get_mtu(const struct dst_entry * dst)811*4882a593Smuzhiyun ovs_dst_get_mtu(const struct dst_entry *dst)
812*4882a593Smuzhiyun {
813*4882a593Smuzhiyun 	return dst->dev->mtu;
814*4882a593Smuzhiyun }
815*4882a593Smuzhiyun 
816*4882a593Smuzhiyun static struct dst_ops ovs_dst_ops = {
817*4882a593Smuzhiyun 	.family = AF_UNSPEC,
818*4882a593Smuzhiyun 	.mtu = ovs_dst_get_mtu,
819*4882a593Smuzhiyun };
820*4882a593Smuzhiyun 
821*4882a593Smuzhiyun /* prepare_frag() is called once per (larger-than-MTU) frame; its inverse is
822*4882a593Smuzhiyun  * ovs_vport_output(), which is called once per fragmented packet.
823*4882a593Smuzhiyun  */
prepare_frag(struct vport * vport,struct sk_buff * skb,u16 orig_network_offset,u8 mac_proto)824*4882a593Smuzhiyun static void prepare_frag(struct vport *vport, struct sk_buff *skb,
825*4882a593Smuzhiyun 			 u16 orig_network_offset, u8 mac_proto)
826*4882a593Smuzhiyun {
827*4882a593Smuzhiyun 	unsigned int hlen = skb_network_offset(skb);
828*4882a593Smuzhiyun 	struct ovs_frag_data *data;
829*4882a593Smuzhiyun 
830*4882a593Smuzhiyun 	data = this_cpu_ptr(&ovs_frag_data_storage);
831*4882a593Smuzhiyun 	data->dst = skb->_skb_refdst;
832*4882a593Smuzhiyun 	data->vport = vport;
833*4882a593Smuzhiyun 	data->cb = *OVS_CB(skb);
834*4882a593Smuzhiyun 	data->inner_protocol = skb->inner_protocol;
835*4882a593Smuzhiyun 	data->network_offset = orig_network_offset;
836*4882a593Smuzhiyun 	if (skb_vlan_tag_present(skb))
837*4882a593Smuzhiyun 		data->vlan_tci = skb_vlan_tag_get(skb) | VLAN_CFI_MASK;
838*4882a593Smuzhiyun 	else
839*4882a593Smuzhiyun 		data->vlan_tci = 0;
840*4882a593Smuzhiyun 	data->vlan_proto = skb->vlan_proto;
841*4882a593Smuzhiyun 	data->mac_proto = mac_proto;
842*4882a593Smuzhiyun 	data->l2_len = hlen;
843*4882a593Smuzhiyun 	memcpy(&data->l2_data, skb->data, hlen);
844*4882a593Smuzhiyun 
845*4882a593Smuzhiyun 	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
846*4882a593Smuzhiyun 	skb_pull(skb, hlen);
847*4882a593Smuzhiyun }
848*4882a593Smuzhiyun 
ovs_fragment(struct net * net,struct vport * vport,struct sk_buff * skb,u16 mru,struct sw_flow_key * key)849*4882a593Smuzhiyun static void ovs_fragment(struct net *net, struct vport *vport,
850*4882a593Smuzhiyun 			 struct sk_buff *skb, u16 mru,
851*4882a593Smuzhiyun 			 struct sw_flow_key *key)
852*4882a593Smuzhiyun {
853*4882a593Smuzhiyun 	u16 orig_network_offset = 0;
854*4882a593Smuzhiyun 
855*4882a593Smuzhiyun 	if (eth_p_mpls(skb->protocol)) {
856*4882a593Smuzhiyun 		orig_network_offset = skb_network_offset(skb);
857*4882a593Smuzhiyun 		skb->network_header = skb->inner_network_header;
858*4882a593Smuzhiyun 	}
859*4882a593Smuzhiyun 
860*4882a593Smuzhiyun 	if (skb_network_offset(skb) > MAX_L2_LEN) {
861*4882a593Smuzhiyun 		OVS_NLERR(1, "L2 header too long to fragment");
862*4882a593Smuzhiyun 		goto err;
863*4882a593Smuzhiyun 	}
864*4882a593Smuzhiyun 
865*4882a593Smuzhiyun 	if (key->eth.type == htons(ETH_P_IP)) {
866*4882a593Smuzhiyun 		struct rtable ovs_rt = { 0 };
867*4882a593Smuzhiyun 		unsigned long orig_dst;
868*4882a593Smuzhiyun 
869*4882a593Smuzhiyun 		prepare_frag(vport, skb, orig_network_offset,
870*4882a593Smuzhiyun 			     ovs_key_mac_proto(key));
871*4882a593Smuzhiyun 		dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
872*4882a593Smuzhiyun 			 DST_OBSOLETE_NONE, DST_NOCOUNT);
873*4882a593Smuzhiyun 		ovs_rt.dst.dev = vport->dev;
874*4882a593Smuzhiyun 
875*4882a593Smuzhiyun 		orig_dst = skb->_skb_refdst;
876*4882a593Smuzhiyun 		skb_dst_set_noref(skb, &ovs_rt.dst);
877*4882a593Smuzhiyun 		IPCB(skb)->frag_max_size = mru;
878*4882a593Smuzhiyun 
879*4882a593Smuzhiyun 		ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
880*4882a593Smuzhiyun 		refdst_drop(orig_dst);
881*4882a593Smuzhiyun 	} else if (key->eth.type == htons(ETH_P_IPV6)) {
882*4882a593Smuzhiyun 		unsigned long orig_dst;
883*4882a593Smuzhiyun 		struct rt6_info ovs_rt;
884*4882a593Smuzhiyun 
885*4882a593Smuzhiyun 		prepare_frag(vport, skb, orig_network_offset,
886*4882a593Smuzhiyun 			     ovs_key_mac_proto(key));
887*4882a593Smuzhiyun 		memset(&ovs_rt, 0, sizeof(ovs_rt));
888*4882a593Smuzhiyun 		dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
889*4882a593Smuzhiyun 			 DST_OBSOLETE_NONE, DST_NOCOUNT);
890*4882a593Smuzhiyun 		ovs_rt.dst.dev = vport->dev;
891*4882a593Smuzhiyun 
892*4882a593Smuzhiyun 		orig_dst = skb->_skb_refdst;
893*4882a593Smuzhiyun 		skb_dst_set_noref(skb, &ovs_rt.dst);
894*4882a593Smuzhiyun 		IP6CB(skb)->frag_max_size = mru;
895*4882a593Smuzhiyun 
896*4882a593Smuzhiyun 		ipv6_stub->ipv6_fragment(net, skb->sk, skb, ovs_vport_output);
897*4882a593Smuzhiyun 		refdst_drop(orig_dst);
898*4882a593Smuzhiyun 	} else {
899*4882a593Smuzhiyun 		WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
900*4882a593Smuzhiyun 			  ovs_vport_name(vport), ntohs(key->eth.type), mru,
901*4882a593Smuzhiyun 			  vport->dev->mtu);
902*4882a593Smuzhiyun 		goto err;
903*4882a593Smuzhiyun 	}
904*4882a593Smuzhiyun 
905*4882a593Smuzhiyun 	return;
906*4882a593Smuzhiyun err:
907*4882a593Smuzhiyun 	kfree_skb(skb);
908*4882a593Smuzhiyun }
909*4882a593Smuzhiyun 
do_output(struct datapath * dp,struct sk_buff * skb,int out_port,struct sw_flow_key * key)910*4882a593Smuzhiyun static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
911*4882a593Smuzhiyun 		      struct sw_flow_key *key)
912*4882a593Smuzhiyun {
913*4882a593Smuzhiyun 	struct vport *vport = ovs_vport_rcu(dp, out_port);
914*4882a593Smuzhiyun 
915*4882a593Smuzhiyun 	if (likely(vport)) {
916*4882a593Smuzhiyun 		u16 mru = OVS_CB(skb)->mru;
917*4882a593Smuzhiyun 		u32 cutlen = OVS_CB(skb)->cutlen;
918*4882a593Smuzhiyun 
919*4882a593Smuzhiyun 		if (unlikely(cutlen > 0)) {
920*4882a593Smuzhiyun 			if (skb->len - cutlen > ovs_mac_header_len(key))
921*4882a593Smuzhiyun 				pskb_trim(skb, skb->len - cutlen);
922*4882a593Smuzhiyun 			else
923*4882a593Smuzhiyun 				pskb_trim(skb, ovs_mac_header_len(key));
924*4882a593Smuzhiyun 		}
925*4882a593Smuzhiyun 
926*4882a593Smuzhiyun 		if (likely(!mru ||
927*4882a593Smuzhiyun 		           (skb->len <= mru + vport->dev->hard_header_len))) {
928*4882a593Smuzhiyun 			ovs_vport_send(vport, skb, ovs_key_mac_proto(key));
929*4882a593Smuzhiyun 		} else if (mru <= vport->dev->mtu) {
930*4882a593Smuzhiyun 			struct net *net = read_pnet(&dp->net);
931*4882a593Smuzhiyun 
932*4882a593Smuzhiyun 			ovs_fragment(net, vport, skb, mru, key);
933*4882a593Smuzhiyun 		} else {
934*4882a593Smuzhiyun 			kfree_skb(skb);
935*4882a593Smuzhiyun 		}
936*4882a593Smuzhiyun 	} else {
937*4882a593Smuzhiyun 		kfree_skb(skb);
938*4882a593Smuzhiyun 	}
939*4882a593Smuzhiyun }
940*4882a593Smuzhiyun 
output_userspace(struct datapath * dp,struct sk_buff * skb,struct sw_flow_key * key,const struct nlattr * attr,const struct nlattr * actions,int actions_len,uint32_t cutlen)941*4882a593Smuzhiyun static int output_userspace(struct datapath *dp, struct sk_buff *skb,
942*4882a593Smuzhiyun 			    struct sw_flow_key *key, const struct nlattr *attr,
943*4882a593Smuzhiyun 			    const struct nlattr *actions, int actions_len,
944*4882a593Smuzhiyun 			    uint32_t cutlen)
945*4882a593Smuzhiyun {
946*4882a593Smuzhiyun 	struct dp_upcall_info upcall;
947*4882a593Smuzhiyun 	const struct nlattr *a;
948*4882a593Smuzhiyun 	int rem;
949*4882a593Smuzhiyun 
950*4882a593Smuzhiyun 	memset(&upcall, 0, sizeof(upcall));
951*4882a593Smuzhiyun 	upcall.cmd = OVS_PACKET_CMD_ACTION;
952*4882a593Smuzhiyun 	upcall.mru = OVS_CB(skb)->mru;
953*4882a593Smuzhiyun 
954*4882a593Smuzhiyun 	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
955*4882a593Smuzhiyun 	     a = nla_next(a, &rem)) {
956*4882a593Smuzhiyun 		switch (nla_type(a)) {
957*4882a593Smuzhiyun 		case OVS_USERSPACE_ATTR_USERDATA:
958*4882a593Smuzhiyun 			upcall.userdata = a;
959*4882a593Smuzhiyun 			break;
960*4882a593Smuzhiyun 
961*4882a593Smuzhiyun 		case OVS_USERSPACE_ATTR_PID:
962*4882a593Smuzhiyun 			upcall.portid = nla_get_u32(a);
963*4882a593Smuzhiyun 			break;
964*4882a593Smuzhiyun 
965*4882a593Smuzhiyun 		case OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: {
966*4882a593Smuzhiyun 			/* Get out tunnel info. */
967*4882a593Smuzhiyun 			struct vport *vport;
968*4882a593Smuzhiyun 
969*4882a593Smuzhiyun 			vport = ovs_vport_rcu(dp, nla_get_u32(a));
970*4882a593Smuzhiyun 			if (vport) {
971*4882a593Smuzhiyun 				int err;
972*4882a593Smuzhiyun 
973*4882a593Smuzhiyun 				err = dev_fill_metadata_dst(vport->dev, skb);
974*4882a593Smuzhiyun 				if (!err)
975*4882a593Smuzhiyun 					upcall.egress_tun_info = skb_tunnel_info(skb);
976*4882a593Smuzhiyun 			}
977*4882a593Smuzhiyun 
978*4882a593Smuzhiyun 			break;
979*4882a593Smuzhiyun 		}
980*4882a593Smuzhiyun 
981*4882a593Smuzhiyun 		case OVS_USERSPACE_ATTR_ACTIONS: {
982*4882a593Smuzhiyun 			/* Include actions. */
983*4882a593Smuzhiyun 			upcall.actions = actions;
984*4882a593Smuzhiyun 			upcall.actions_len = actions_len;
985*4882a593Smuzhiyun 			break;
986*4882a593Smuzhiyun 		}
987*4882a593Smuzhiyun 
988*4882a593Smuzhiyun 		} /* End of switch. */
989*4882a593Smuzhiyun 	}
990*4882a593Smuzhiyun 
991*4882a593Smuzhiyun 	return ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
992*4882a593Smuzhiyun }
993*4882a593Smuzhiyun 
dec_ttl_exception_handler(struct datapath * dp,struct sk_buff * skb,struct sw_flow_key * key,const struct nlattr * attr,bool last)994*4882a593Smuzhiyun static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
995*4882a593Smuzhiyun 				     struct sw_flow_key *key,
996*4882a593Smuzhiyun 				     const struct nlattr *attr, bool last)
997*4882a593Smuzhiyun {
998*4882a593Smuzhiyun 	/* The first attribute is always 'OVS_DEC_TTL_ATTR_ACTION'. */
999*4882a593Smuzhiyun 	struct nlattr *actions = nla_data(attr);
1000*4882a593Smuzhiyun 
1001*4882a593Smuzhiyun 	if (nla_len(actions))
1002*4882a593Smuzhiyun 		return clone_execute(dp, skb, key, 0, nla_data(actions),
1003*4882a593Smuzhiyun 				     nla_len(actions), last, false);
1004*4882a593Smuzhiyun 
1005*4882a593Smuzhiyun 	consume_skb(skb);
1006*4882a593Smuzhiyun 	return 0;
1007*4882a593Smuzhiyun }
1008*4882a593Smuzhiyun 
1009*4882a593Smuzhiyun /* When 'last' is true, sample() should always consume the 'skb'.
1010*4882a593Smuzhiyun  * Otherwise, sample() should keep 'skb' intact regardless what
1011*4882a593Smuzhiyun  * actions are executed within sample().
1012*4882a593Smuzhiyun  */
sample(struct datapath * dp,struct sk_buff * skb,struct sw_flow_key * key,const struct nlattr * attr,bool last)1013*4882a593Smuzhiyun static int sample(struct datapath *dp, struct sk_buff *skb,
1014*4882a593Smuzhiyun 		  struct sw_flow_key *key, const struct nlattr *attr,
1015*4882a593Smuzhiyun 		  bool last)
1016*4882a593Smuzhiyun {
1017*4882a593Smuzhiyun 	struct nlattr *actions;
1018*4882a593Smuzhiyun 	struct nlattr *sample_arg;
1019*4882a593Smuzhiyun 	int rem = nla_len(attr);
1020*4882a593Smuzhiyun 	const struct sample_arg *arg;
1021*4882a593Smuzhiyun 	bool clone_flow_key;
1022*4882a593Smuzhiyun 
1023*4882a593Smuzhiyun 	/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
1024*4882a593Smuzhiyun 	sample_arg = nla_data(attr);
1025*4882a593Smuzhiyun 	arg = nla_data(sample_arg);
1026*4882a593Smuzhiyun 	actions = nla_next(sample_arg, &rem);
1027*4882a593Smuzhiyun 
1028*4882a593Smuzhiyun 	if ((arg->probability != U32_MAX) &&
1029*4882a593Smuzhiyun 	    (!arg->probability || prandom_u32() > arg->probability)) {
1030*4882a593Smuzhiyun 		if (last)
1031*4882a593Smuzhiyun 			consume_skb(skb);
1032*4882a593Smuzhiyun 		return 0;
1033*4882a593Smuzhiyun 	}
1034*4882a593Smuzhiyun 
1035*4882a593Smuzhiyun 	clone_flow_key = !arg->exec;
1036*4882a593Smuzhiyun 	return clone_execute(dp, skb, key, 0, actions, rem, last,
1037*4882a593Smuzhiyun 			     clone_flow_key);
1038*4882a593Smuzhiyun }
1039*4882a593Smuzhiyun 
1040*4882a593Smuzhiyun /* When 'last' is true, clone() should always consume the 'skb'.
1041*4882a593Smuzhiyun  * Otherwise, clone() should keep 'skb' intact regardless what
1042*4882a593Smuzhiyun  * actions are executed within clone().
1043*4882a593Smuzhiyun  */
clone(struct datapath * dp,struct sk_buff * skb,struct sw_flow_key * key,const struct nlattr * attr,bool last)1044*4882a593Smuzhiyun static int clone(struct datapath *dp, struct sk_buff *skb,
1045*4882a593Smuzhiyun 		 struct sw_flow_key *key, const struct nlattr *attr,
1046*4882a593Smuzhiyun 		 bool last)
1047*4882a593Smuzhiyun {
1048*4882a593Smuzhiyun 	struct nlattr *actions;
1049*4882a593Smuzhiyun 	struct nlattr *clone_arg;
1050*4882a593Smuzhiyun 	int rem = nla_len(attr);
1051*4882a593Smuzhiyun 	bool dont_clone_flow_key;
1052*4882a593Smuzhiyun 
1053*4882a593Smuzhiyun 	/* The first action is always 'OVS_CLONE_ATTR_EXEC'. */
1054*4882a593Smuzhiyun 	clone_arg = nla_data(attr);
1055*4882a593Smuzhiyun 	dont_clone_flow_key = nla_get_u32(clone_arg);
1056*4882a593Smuzhiyun 	actions = nla_next(clone_arg, &rem);
1057*4882a593Smuzhiyun 
1058*4882a593Smuzhiyun 	return clone_execute(dp, skb, key, 0, actions, rem, last,
1059*4882a593Smuzhiyun 			     !dont_clone_flow_key);
1060*4882a593Smuzhiyun }
1061*4882a593Smuzhiyun 
execute_hash(struct sk_buff * skb,struct sw_flow_key * key,const struct nlattr * attr)1062*4882a593Smuzhiyun static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
1063*4882a593Smuzhiyun 			 const struct nlattr *attr)
1064*4882a593Smuzhiyun {
1065*4882a593Smuzhiyun 	struct ovs_action_hash *hash_act = nla_data(attr);
1066*4882a593Smuzhiyun 	u32 hash = 0;
1067*4882a593Smuzhiyun 
1068*4882a593Smuzhiyun 	/* OVS_HASH_ALG_L4 is the only possible hash algorithm.  */
1069*4882a593Smuzhiyun 	hash = skb_get_hash(skb);
1070*4882a593Smuzhiyun 	hash = jhash_1word(hash, hash_act->hash_basis);
1071*4882a593Smuzhiyun 	if (!hash)
1072*4882a593Smuzhiyun 		hash = 0x1;
1073*4882a593Smuzhiyun 
1074*4882a593Smuzhiyun 	key->ovs_flow_hash = hash;
1075*4882a593Smuzhiyun }
1076*4882a593Smuzhiyun 
execute_set_action(struct sk_buff * skb,struct sw_flow_key * flow_key,const struct nlattr * a)1077*4882a593Smuzhiyun static int execute_set_action(struct sk_buff *skb,
1078*4882a593Smuzhiyun 			      struct sw_flow_key *flow_key,
1079*4882a593Smuzhiyun 			      const struct nlattr *a)
1080*4882a593Smuzhiyun {
1081*4882a593Smuzhiyun 	/* Only tunnel set execution is supported without a mask. */
1082*4882a593Smuzhiyun 	if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
1083*4882a593Smuzhiyun 		struct ovs_tunnel_info *tun = nla_data(a);
1084*4882a593Smuzhiyun 
1085*4882a593Smuzhiyun 		skb_dst_drop(skb);
1086*4882a593Smuzhiyun 		dst_hold((struct dst_entry *)tun->tun_dst);
1087*4882a593Smuzhiyun 		skb_dst_set(skb, (struct dst_entry *)tun->tun_dst);
1088*4882a593Smuzhiyun 		return 0;
1089*4882a593Smuzhiyun 	}
1090*4882a593Smuzhiyun 
1091*4882a593Smuzhiyun 	return -EINVAL;
1092*4882a593Smuzhiyun }
1093*4882a593Smuzhiyun 
1094*4882a593Smuzhiyun /* Mask is at the midpoint of the data. */
1095*4882a593Smuzhiyun #define get_mask(a, type) ((const type)nla_data(a) + 1)
1096*4882a593Smuzhiyun 
execute_masked_set_action(struct sk_buff * skb,struct sw_flow_key * flow_key,const struct nlattr * a)1097*4882a593Smuzhiyun static int execute_masked_set_action(struct sk_buff *skb,
1098*4882a593Smuzhiyun 				     struct sw_flow_key *flow_key,
1099*4882a593Smuzhiyun 				     const struct nlattr *a)
1100*4882a593Smuzhiyun {
1101*4882a593Smuzhiyun 	int err = 0;
1102*4882a593Smuzhiyun 
1103*4882a593Smuzhiyun 	switch (nla_type(a)) {
1104*4882a593Smuzhiyun 	case OVS_KEY_ATTR_PRIORITY:
1105*4882a593Smuzhiyun 		OVS_SET_MASKED(skb->priority, nla_get_u32(a),
1106*4882a593Smuzhiyun 			       *get_mask(a, u32 *));
1107*4882a593Smuzhiyun 		flow_key->phy.priority = skb->priority;
1108*4882a593Smuzhiyun 		break;
1109*4882a593Smuzhiyun 
1110*4882a593Smuzhiyun 	case OVS_KEY_ATTR_SKB_MARK:
1111*4882a593Smuzhiyun 		OVS_SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
1112*4882a593Smuzhiyun 		flow_key->phy.skb_mark = skb->mark;
1113*4882a593Smuzhiyun 		break;
1114*4882a593Smuzhiyun 
1115*4882a593Smuzhiyun 	case OVS_KEY_ATTR_TUNNEL_INFO:
1116*4882a593Smuzhiyun 		/* Masked data not supported for tunnel. */
1117*4882a593Smuzhiyun 		err = -EINVAL;
1118*4882a593Smuzhiyun 		break;
1119*4882a593Smuzhiyun 
1120*4882a593Smuzhiyun 	case OVS_KEY_ATTR_ETHERNET:
1121*4882a593Smuzhiyun 		err = set_eth_addr(skb, flow_key, nla_data(a),
1122*4882a593Smuzhiyun 				   get_mask(a, struct ovs_key_ethernet *));
1123*4882a593Smuzhiyun 		break;
1124*4882a593Smuzhiyun 
1125*4882a593Smuzhiyun 	case OVS_KEY_ATTR_NSH:
1126*4882a593Smuzhiyun 		err = set_nsh(skb, flow_key, a);
1127*4882a593Smuzhiyun 		break;
1128*4882a593Smuzhiyun 
1129*4882a593Smuzhiyun 	case OVS_KEY_ATTR_IPV4:
1130*4882a593Smuzhiyun 		err = set_ipv4(skb, flow_key, nla_data(a),
1131*4882a593Smuzhiyun 			       get_mask(a, struct ovs_key_ipv4 *));
1132*4882a593Smuzhiyun 		break;
1133*4882a593Smuzhiyun 
1134*4882a593Smuzhiyun 	case OVS_KEY_ATTR_IPV6:
1135*4882a593Smuzhiyun 		err = set_ipv6(skb, flow_key, nla_data(a),
1136*4882a593Smuzhiyun 			       get_mask(a, struct ovs_key_ipv6 *));
1137*4882a593Smuzhiyun 		break;
1138*4882a593Smuzhiyun 
1139*4882a593Smuzhiyun 	case OVS_KEY_ATTR_TCP:
1140*4882a593Smuzhiyun 		err = set_tcp(skb, flow_key, nla_data(a),
1141*4882a593Smuzhiyun 			      get_mask(a, struct ovs_key_tcp *));
1142*4882a593Smuzhiyun 		break;
1143*4882a593Smuzhiyun 
1144*4882a593Smuzhiyun 	case OVS_KEY_ATTR_UDP:
1145*4882a593Smuzhiyun 		err = set_udp(skb, flow_key, nla_data(a),
1146*4882a593Smuzhiyun 			      get_mask(a, struct ovs_key_udp *));
1147*4882a593Smuzhiyun 		break;
1148*4882a593Smuzhiyun 
1149*4882a593Smuzhiyun 	case OVS_KEY_ATTR_SCTP:
1150*4882a593Smuzhiyun 		err = set_sctp(skb, flow_key, nla_data(a),
1151*4882a593Smuzhiyun 			       get_mask(a, struct ovs_key_sctp *));
1152*4882a593Smuzhiyun 		break;
1153*4882a593Smuzhiyun 
1154*4882a593Smuzhiyun 	case OVS_KEY_ATTR_MPLS:
1155*4882a593Smuzhiyun 		err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
1156*4882a593Smuzhiyun 								    __be32 *));
1157*4882a593Smuzhiyun 		break;
1158*4882a593Smuzhiyun 
1159*4882a593Smuzhiyun 	case OVS_KEY_ATTR_CT_STATE:
1160*4882a593Smuzhiyun 	case OVS_KEY_ATTR_CT_ZONE:
1161*4882a593Smuzhiyun 	case OVS_KEY_ATTR_CT_MARK:
1162*4882a593Smuzhiyun 	case OVS_KEY_ATTR_CT_LABELS:
1163*4882a593Smuzhiyun 	case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
1164*4882a593Smuzhiyun 	case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
1165*4882a593Smuzhiyun 		err = -EINVAL;
1166*4882a593Smuzhiyun 		break;
1167*4882a593Smuzhiyun 	}
1168*4882a593Smuzhiyun 
1169*4882a593Smuzhiyun 	return err;
1170*4882a593Smuzhiyun }
1171*4882a593Smuzhiyun 
execute_recirc(struct datapath * dp,struct sk_buff * skb,struct sw_flow_key * key,const struct nlattr * a,bool last)1172*4882a593Smuzhiyun static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
1173*4882a593Smuzhiyun 			  struct sw_flow_key *key,
1174*4882a593Smuzhiyun 			  const struct nlattr *a, bool last)
1175*4882a593Smuzhiyun {
1176*4882a593Smuzhiyun 	u32 recirc_id;
1177*4882a593Smuzhiyun 
1178*4882a593Smuzhiyun 	if (!is_flow_key_valid(key)) {
1179*4882a593Smuzhiyun 		int err;
1180*4882a593Smuzhiyun 
1181*4882a593Smuzhiyun 		err = ovs_flow_key_update(skb, key);
1182*4882a593Smuzhiyun 		if (err)
1183*4882a593Smuzhiyun 			return err;
1184*4882a593Smuzhiyun 	}
1185*4882a593Smuzhiyun 	BUG_ON(!is_flow_key_valid(key));
1186*4882a593Smuzhiyun 
1187*4882a593Smuzhiyun 	recirc_id = nla_get_u32(a);
1188*4882a593Smuzhiyun 	return clone_execute(dp, skb, key, recirc_id, NULL, 0, last, true);
1189*4882a593Smuzhiyun }
1190*4882a593Smuzhiyun 
execute_check_pkt_len(struct datapath * dp,struct sk_buff * skb,struct sw_flow_key * key,const struct nlattr * attr,bool last)1191*4882a593Smuzhiyun static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb,
1192*4882a593Smuzhiyun 				 struct sw_flow_key *key,
1193*4882a593Smuzhiyun 				 const struct nlattr *attr, bool last)
1194*4882a593Smuzhiyun {
1195*4882a593Smuzhiyun 	struct ovs_skb_cb *ovs_cb = OVS_CB(skb);
1196*4882a593Smuzhiyun 	const struct nlattr *actions, *cpl_arg;
1197*4882a593Smuzhiyun 	int len, max_len, rem = nla_len(attr);
1198*4882a593Smuzhiyun 	const struct check_pkt_len_arg *arg;
1199*4882a593Smuzhiyun 	bool clone_flow_key;
1200*4882a593Smuzhiyun 
1201*4882a593Smuzhiyun 	/* The first netlink attribute in 'attr' is always
1202*4882a593Smuzhiyun 	 * 'OVS_CHECK_PKT_LEN_ATTR_ARG'.
1203*4882a593Smuzhiyun 	 */
1204*4882a593Smuzhiyun 	cpl_arg = nla_data(attr);
1205*4882a593Smuzhiyun 	arg = nla_data(cpl_arg);
1206*4882a593Smuzhiyun 
1207*4882a593Smuzhiyun 	len = ovs_cb->mru ? ovs_cb->mru + skb->mac_len : skb->len;
1208*4882a593Smuzhiyun 	max_len = arg->pkt_len;
1209*4882a593Smuzhiyun 
1210*4882a593Smuzhiyun 	if ((skb_is_gso(skb) && skb_gso_validate_mac_len(skb, max_len)) ||
1211*4882a593Smuzhiyun 	    len <= max_len) {
1212*4882a593Smuzhiyun 		/* Second netlink attribute in 'attr' is always
1213*4882a593Smuzhiyun 		 * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'.
1214*4882a593Smuzhiyun 		 */
1215*4882a593Smuzhiyun 		actions = nla_next(cpl_arg, &rem);
1216*4882a593Smuzhiyun 		clone_flow_key = !arg->exec_for_lesser_equal;
1217*4882a593Smuzhiyun 	} else {
1218*4882a593Smuzhiyun 		/* Third netlink attribute in 'attr' is always
1219*4882a593Smuzhiyun 		 * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER'.
1220*4882a593Smuzhiyun 		 */
1221*4882a593Smuzhiyun 		actions = nla_next(cpl_arg, &rem);
1222*4882a593Smuzhiyun 		actions = nla_next(actions, &rem);
1223*4882a593Smuzhiyun 		clone_flow_key = !arg->exec_for_greater;
1224*4882a593Smuzhiyun 	}
1225*4882a593Smuzhiyun 
1226*4882a593Smuzhiyun 	return clone_execute(dp, skb, key, 0, nla_data(actions),
1227*4882a593Smuzhiyun 			     nla_len(actions), last, clone_flow_key);
1228*4882a593Smuzhiyun }
1229*4882a593Smuzhiyun 
execute_dec_ttl(struct sk_buff * skb,struct sw_flow_key * key)1230*4882a593Smuzhiyun static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
1231*4882a593Smuzhiyun {
1232*4882a593Smuzhiyun 	int err;
1233*4882a593Smuzhiyun 
1234*4882a593Smuzhiyun 	if (skb->protocol == htons(ETH_P_IPV6)) {
1235*4882a593Smuzhiyun 		struct ipv6hdr *nh;
1236*4882a593Smuzhiyun 
1237*4882a593Smuzhiyun 		err = skb_ensure_writable(skb, skb_network_offset(skb) +
1238*4882a593Smuzhiyun 					  sizeof(*nh));
1239*4882a593Smuzhiyun 		if (unlikely(err))
1240*4882a593Smuzhiyun 			return err;
1241*4882a593Smuzhiyun 
1242*4882a593Smuzhiyun 		nh = ipv6_hdr(skb);
1243*4882a593Smuzhiyun 
1244*4882a593Smuzhiyun 		if (nh->hop_limit <= 1)
1245*4882a593Smuzhiyun 			return -EHOSTUNREACH;
1246*4882a593Smuzhiyun 
1247*4882a593Smuzhiyun 		key->ip.ttl = --nh->hop_limit;
1248*4882a593Smuzhiyun 	} else if (skb->protocol == htons(ETH_P_IP)) {
1249*4882a593Smuzhiyun 		struct iphdr *nh;
1250*4882a593Smuzhiyun 		u8 old_ttl;
1251*4882a593Smuzhiyun 
1252*4882a593Smuzhiyun 		err = skb_ensure_writable(skb, skb_network_offset(skb) +
1253*4882a593Smuzhiyun 					  sizeof(*nh));
1254*4882a593Smuzhiyun 		if (unlikely(err))
1255*4882a593Smuzhiyun 			return err;
1256*4882a593Smuzhiyun 
1257*4882a593Smuzhiyun 		nh = ip_hdr(skb);
1258*4882a593Smuzhiyun 		if (nh->ttl <= 1)
1259*4882a593Smuzhiyun 			return -EHOSTUNREACH;
1260*4882a593Smuzhiyun 
1261*4882a593Smuzhiyun 		old_ttl = nh->ttl--;
1262*4882a593Smuzhiyun 		csum_replace2(&nh->check, htons(old_ttl << 8),
1263*4882a593Smuzhiyun 			      htons(nh->ttl << 8));
1264*4882a593Smuzhiyun 		key->ip.ttl = nh->ttl;
1265*4882a593Smuzhiyun 	}
1266*4882a593Smuzhiyun 	return 0;
1267*4882a593Smuzhiyun }
1268*4882a593Smuzhiyun 
1269*4882a593Smuzhiyun /* Execute a list of actions against 'skb'. */
do_execute_actions(struct datapath * dp,struct sk_buff * skb,struct sw_flow_key * key,const struct nlattr * attr,int len)1270*4882a593Smuzhiyun static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
1271*4882a593Smuzhiyun 			      struct sw_flow_key *key,
1272*4882a593Smuzhiyun 			      const struct nlattr *attr, int len)
1273*4882a593Smuzhiyun {
1274*4882a593Smuzhiyun 	const struct nlattr *a;
1275*4882a593Smuzhiyun 	int rem;
1276*4882a593Smuzhiyun 
1277*4882a593Smuzhiyun 	for (a = attr, rem = len; rem > 0;
1278*4882a593Smuzhiyun 	     a = nla_next(a, &rem)) {
1279*4882a593Smuzhiyun 		int err = 0;
1280*4882a593Smuzhiyun 
1281*4882a593Smuzhiyun 		switch (nla_type(a)) {
1282*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_OUTPUT: {
1283*4882a593Smuzhiyun 			int port = nla_get_u32(a);
1284*4882a593Smuzhiyun 			struct sk_buff *clone;
1285*4882a593Smuzhiyun 
1286*4882a593Smuzhiyun 			/* Every output action needs a separate clone
1287*4882a593Smuzhiyun 			 * of 'skb', In case the output action is the
1288*4882a593Smuzhiyun 			 * last action, cloning can be avoided.
1289*4882a593Smuzhiyun 			 */
1290*4882a593Smuzhiyun 			if (nla_is_last(a, rem)) {
1291*4882a593Smuzhiyun 				do_output(dp, skb, port, key);
1292*4882a593Smuzhiyun 				/* 'skb' has been used for output.
1293*4882a593Smuzhiyun 				 */
1294*4882a593Smuzhiyun 				return 0;
1295*4882a593Smuzhiyun 			}
1296*4882a593Smuzhiyun 
1297*4882a593Smuzhiyun 			clone = skb_clone(skb, GFP_ATOMIC);
1298*4882a593Smuzhiyun 			if (clone)
1299*4882a593Smuzhiyun 				do_output(dp, clone, port, key);
1300*4882a593Smuzhiyun 			OVS_CB(skb)->cutlen = 0;
1301*4882a593Smuzhiyun 			break;
1302*4882a593Smuzhiyun 		}
1303*4882a593Smuzhiyun 
1304*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_TRUNC: {
1305*4882a593Smuzhiyun 			struct ovs_action_trunc *trunc = nla_data(a);
1306*4882a593Smuzhiyun 
1307*4882a593Smuzhiyun 			if (skb->len > trunc->max_len)
1308*4882a593Smuzhiyun 				OVS_CB(skb)->cutlen = skb->len - trunc->max_len;
1309*4882a593Smuzhiyun 			break;
1310*4882a593Smuzhiyun 		}
1311*4882a593Smuzhiyun 
1312*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_USERSPACE:
1313*4882a593Smuzhiyun 			output_userspace(dp, skb, key, a, attr,
1314*4882a593Smuzhiyun 						     len, OVS_CB(skb)->cutlen);
1315*4882a593Smuzhiyun 			OVS_CB(skb)->cutlen = 0;
1316*4882a593Smuzhiyun 			break;
1317*4882a593Smuzhiyun 
1318*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_HASH:
1319*4882a593Smuzhiyun 			execute_hash(skb, key, a);
1320*4882a593Smuzhiyun 			break;
1321*4882a593Smuzhiyun 
1322*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_PUSH_MPLS: {
1323*4882a593Smuzhiyun 			struct ovs_action_push_mpls *mpls = nla_data(a);
1324*4882a593Smuzhiyun 
1325*4882a593Smuzhiyun 			err = push_mpls(skb, key, mpls->mpls_lse,
1326*4882a593Smuzhiyun 					mpls->mpls_ethertype, skb->mac_len);
1327*4882a593Smuzhiyun 			break;
1328*4882a593Smuzhiyun 		}
1329*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_ADD_MPLS: {
1330*4882a593Smuzhiyun 			struct ovs_action_add_mpls *mpls = nla_data(a);
1331*4882a593Smuzhiyun 			__u16 mac_len = 0;
1332*4882a593Smuzhiyun 
1333*4882a593Smuzhiyun 			if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK)
1334*4882a593Smuzhiyun 				mac_len = skb->mac_len;
1335*4882a593Smuzhiyun 
1336*4882a593Smuzhiyun 			err = push_mpls(skb, key, mpls->mpls_lse,
1337*4882a593Smuzhiyun 					mpls->mpls_ethertype, mac_len);
1338*4882a593Smuzhiyun 			break;
1339*4882a593Smuzhiyun 		}
1340*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_POP_MPLS:
1341*4882a593Smuzhiyun 			err = pop_mpls(skb, key, nla_get_be16(a));
1342*4882a593Smuzhiyun 			break;
1343*4882a593Smuzhiyun 
1344*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_PUSH_VLAN:
1345*4882a593Smuzhiyun 			err = push_vlan(skb, key, nla_data(a));
1346*4882a593Smuzhiyun 			break;
1347*4882a593Smuzhiyun 
1348*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_POP_VLAN:
1349*4882a593Smuzhiyun 			err = pop_vlan(skb, key);
1350*4882a593Smuzhiyun 			break;
1351*4882a593Smuzhiyun 
1352*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_RECIRC: {
1353*4882a593Smuzhiyun 			bool last = nla_is_last(a, rem);
1354*4882a593Smuzhiyun 
1355*4882a593Smuzhiyun 			err = execute_recirc(dp, skb, key, a, last);
1356*4882a593Smuzhiyun 			if (last) {
1357*4882a593Smuzhiyun 				/* If this is the last action, the skb has
1358*4882a593Smuzhiyun 				 * been consumed or freed.
1359*4882a593Smuzhiyun 				 * Return immediately.
1360*4882a593Smuzhiyun 				 */
1361*4882a593Smuzhiyun 				return err;
1362*4882a593Smuzhiyun 			}
1363*4882a593Smuzhiyun 			break;
1364*4882a593Smuzhiyun 		}
1365*4882a593Smuzhiyun 
1366*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_SET:
1367*4882a593Smuzhiyun 			err = execute_set_action(skb, key, nla_data(a));
1368*4882a593Smuzhiyun 			break;
1369*4882a593Smuzhiyun 
1370*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_SET_MASKED:
1371*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_SET_TO_MASKED:
1372*4882a593Smuzhiyun 			err = execute_masked_set_action(skb, key, nla_data(a));
1373*4882a593Smuzhiyun 			break;
1374*4882a593Smuzhiyun 
1375*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_SAMPLE: {
1376*4882a593Smuzhiyun 			bool last = nla_is_last(a, rem);
1377*4882a593Smuzhiyun 
1378*4882a593Smuzhiyun 			err = sample(dp, skb, key, a, last);
1379*4882a593Smuzhiyun 			if (last)
1380*4882a593Smuzhiyun 				return err;
1381*4882a593Smuzhiyun 
1382*4882a593Smuzhiyun 			break;
1383*4882a593Smuzhiyun 		}
1384*4882a593Smuzhiyun 
1385*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_CT:
1386*4882a593Smuzhiyun 			if (!is_flow_key_valid(key)) {
1387*4882a593Smuzhiyun 				err = ovs_flow_key_update(skb, key);
1388*4882a593Smuzhiyun 				if (err)
1389*4882a593Smuzhiyun 					return err;
1390*4882a593Smuzhiyun 			}
1391*4882a593Smuzhiyun 
1392*4882a593Smuzhiyun 			err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key,
1393*4882a593Smuzhiyun 					     nla_data(a));
1394*4882a593Smuzhiyun 
1395*4882a593Smuzhiyun 			/* Hide stolen IP fragments from user space. */
1396*4882a593Smuzhiyun 			if (err)
1397*4882a593Smuzhiyun 				return err == -EINPROGRESS ? 0 : err;
1398*4882a593Smuzhiyun 			break;
1399*4882a593Smuzhiyun 
1400*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_CT_CLEAR:
1401*4882a593Smuzhiyun 			err = ovs_ct_clear(skb, key);
1402*4882a593Smuzhiyun 			break;
1403*4882a593Smuzhiyun 
1404*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_PUSH_ETH:
1405*4882a593Smuzhiyun 			err = push_eth(skb, key, nla_data(a));
1406*4882a593Smuzhiyun 			break;
1407*4882a593Smuzhiyun 
1408*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_POP_ETH:
1409*4882a593Smuzhiyun 			err = pop_eth(skb, key);
1410*4882a593Smuzhiyun 			break;
1411*4882a593Smuzhiyun 
1412*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_PUSH_NSH: {
1413*4882a593Smuzhiyun 			u8 buffer[NSH_HDR_MAX_LEN];
1414*4882a593Smuzhiyun 			struct nshhdr *nh = (struct nshhdr *)buffer;
1415*4882a593Smuzhiyun 
1416*4882a593Smuzhiyun 			err = nsh_hdr_from_nlattr(nla_data(a), nh,
1417*4882a593Smuzhiyun 						  NSH_HDR_MAX_LEN);
1418*4882a593Smuzhiyun 			if (unlikely(err))
1419*4882a593Smuzhiyun 				break;
1420*4882a593Smuzhiyun 			err = push_nsh(skb, key, nh);
1421*4882a593Smuzhiyun 			break;
1422*4882a593Smuzhiyun 		}
1423*4882a593Smuzhiyun 
1424*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_POP_NSH:
1425*4882a593Smuzhiyun 			err = pop_nsh(skb, key);
1426*4882a593Smuzhiyun 			break;
1427*4882a593Smuzhiyun 
1428*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_METER:
1429*4882a593Smuzhiyun 			if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) {
1430*4882a593Smuzhiyun 				consume_skb(skb);
1431*4882a593Smuzhiyun 				return 0;
1432*4882a593Smuzhiyun 			}
1433*4882a593Smuzhiyun 			break;
1434*4882a593Smuzhiyun 
1435*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_CLONE: {
1436*4882a593Smuzhiyun 			bool last = nla_is_last(a, rem);
1437*4882a593Smuzhiyun 
1438*4882a593Smuzhiyun 			err = clone(dp, skb, key, a, last);
1439*4882a593Smuzhiyun 			if (last)
1440*4882a593Smuzhiyun 				return err;
1441*4882a593Smuzhiyun 
1442*4882a593Smuzhiyun 			break;
1443*4882a593Smuzhiyun 		}
1444*4882a593Smuzhiyun 
1445*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_CHECK_PKT_LEN: {
1446*4882a593Smuzhiyun 			bool last = nla_is_last(a, rem);
1447*4882a593Smuzhiyun 
1448*4882a593Smuzhiyun 			err = execute_check_pkt_len(dp, skb, key, a, last);
1449*4882a593Smuzhiyun 			if (last)
1450*4882a593Smuzhiyun 				return err;
1451*4882a593Smuzhiyun 
1452*4882a593Smuzhiyun 			break;
1453*4882a593Smuzhiyun 		}
1454*4882a593Smuzhiyun 
1455*4882a593Smuzhiyun 		case OVS_ACTION_ATTR_DEC_TTL:
1456*4882a593Smuzhiyun 			err = execute_dec_ttl(skb, key);
1457*4882a593Smuzhiyun 			if (err == -EHOSTUNREACH) {
1458*4882a593Smuzhiyun 				err = dec_ttl_exception_handler(dp, skb, key,
1459*4882a593Smuzhiyun 								a, true);
1460*4882a593Smuzhiyun 				return err;
1461*4882a593Smuzhiyun 			}
1462*4882a593Smuzhiyun 			break;
1463*4882a593Smuzhiyun 		}
1464*4882a593Smuzhiyun 
1465*4882a593Smuzhiyun 		if (unlikely(err)) {
1466*4882a593Smuzhiyun 			kfree_skb(skb);
1467*4882a593Smuzhiyun 			return err;
1468*4882a593Smuzhiyun 		}
1469*4882a593Smuzhiyun 	}
1470*4882a593Smuzhiyun 
1471*4882a593Smuzhiyun 	consume_skb(skb);
1472*4882a593Smuzhiyun 	return 0;
1473*4882a593Smuzhiyun }
1474*4882a593Smuzhiyun 
1475*4882a593Smuzhiyun /* Execute the actions on the clone of the packet. The effect of the
1476*4882a593Smuzhiyun  * execution does not affect the original 'skb' nor the original 'key'.
1477*4882a593Smuzhiyun  *
1478*4882a593Smuzhiyun  * The execution may be deferred in case the actions can not be executed
1479*4882a593Smuzhiyun  * immediately.
1480*4882a593Smuzhiyun  */
clone_execute(struct datapath * dp,struct sk_buff * skb,struct sw_flow_key * key,u32 recirc_id,const struct nlattr * actions,int len,bool last,bool clone_flow_key)1481*4882a593Smuzhiyun static int clone_execute(struct datapath *dp, struct sk_buff *skb,
1482*4882a593Smuzhiyun 			 struct sw_flow_key *key, u32 recirc_id,
1483*4882a593Smuzhiyun 			 const struct nlattr *actions, int len,
1484*4882a593Smuzhiyun 			 bool last, bool clone_flow_key)
1485*4882a593Smuzhiyun {
1486*4882a593Smuzhiyun 	struct deferred_action *da;
1487*4882a593Smuzhiyun 	struct sw_flow_key *clone;
1488*4882a593Smuzhiyun 
1489*4882a593Smuzhiyun 	skb = last ? skb : skb_clone(skb, GFP_ATOMIC);
1490*4882a593Smuzhiyun 	if (!skb) {
1491*4882a593Smuzhiyun 		/* Out of memory, skip this action.
1492*4882a593Smuzhiyun 		 */
1493*4882a593Smuzhiyun 		return 0;
1494*4882a593Smuzhiyun 	}
1495*4882a593Smuzhiyun 
1496*4882a593Smuzhiyun 	/* When clone_flow_key is false, the 'key' will not be change
1497*4882a593Smuzhiyun 	 * by the actions, then the 'key' can be used directly.
1498*4882a593Smuzhiyun 	 * Otherwise, try to clone key from the next recursion level of
1499*4882a593Smuzhiyun 	 * 'flow_keys'. If clone is successful, execute the actions
1500*4882a593Smuzhiyun 	 * without deferring.
1501*4882a593Smuzhiyun 	 */
1502*4882a593Smuzhiyun 	clone = clone_flow_key ? clone_key(key) : key;
1503*4882a593Smuzhiyun 	if (clone) {
1504*4882a593Smuzhiyun 		int err = 0;
1505*4882a593Smuzhiyun 
1506*4882a593Smuzhiyun 		if (actions) { /* Sample action */
1507*4882a593Smuzhiyun 			if (clone_flow_key)
1508*4882a593Smuzhiyun 				__this_cpu_inc(exec_actions_level);
1509*4882a593Smuzhiyun 
1510*4882a593Smuzhiyun 			err = do_execute_actions(dp, skb, clone,
1511*4882a593Smuzhiyun 						 actions, len);
1512*4882a593Smuzhiyun 
1513*4882a593Smuzhiyun 			if (clone_flow_key)
1514*4882a593Smuzhiyun 				__this_cpu_dec(exec_actions_level);
1515*4882a593Smuzhiyun 		} else { /* Recirc action */
1516*4882a593Smuzhiyun 			clone->recirc_id = recirc_id;
1517*4882a593Smuzhiyun 			ovs_dp_process_packet(skb, clone);
1518*4882a593Smuzhiyun 		}
1519*4882a593Smuzhiyun 		return err;
1520*4882a593Smuzhiyun 	}
1521*4882a593Smuzhiyun 
1522*4882a593Smuzhiyun 	/* Out of 'flow_keys' space. Defer actions */
1523*4882a593Smuzhiyun 	da = add_deferred_actions(skb, key, actions, len);
1524*4882a593Smuzhiyun 	if (da) {
1525*4882a593Smuzhiyun 		if (!actions) { /* Recirc action */
1526*4882a593Smuzhiyun 			key = &da->pkt_key;
1527*4882a593Smuzhiyun 			key->recirc_id = recirc_id;
1528*4882a593Smuzhiyun 		}
1529*4882a593Smuzhiyun 	} else {
1530*4882a593Smuzhiyun 		/* Out of per CPU action FIFO space. Drop the 'skb' and
1531*4882a593Smuzhiyun 		 * log an error.
1532*4882a593Smuzhiyun 		 */
1533*4882a593Smuzhiyun 		kfree_skb(skb);
1534*4882a593Smuzhiyun 
1535*4882a593Smuzhiyun 		if (net_ratelimit()) {
1536*4882a593Smuzhiyun 			if (actions) { /* Sample action */
1537*4882a593Smuzhiyun 				pr_warn("%s: deferred action limit reached, drop sample action\n",
1538*4882a593Smuzhiyun 					ovs_dp_name(dp));
1539*4882a593Smuzhiyun 			} else {  /* Recirc action */
1540*4882a593Smuzhiyun 				pr_warn("%s: deferred action limit reached, drop recirc action\n",
1541*4882a593Smuzhiyun 					ovs_dp_name(dp));
1542*4882a593Smuzhiyun 			}
1543*4882a593Smuzhiyun 		}
1544*4882a593Smuzhiyun 	}
1545*4882a593Smuzhiyun 	return 0;
1546*4882a593Smuzhiyun }
1547*4882a593Smuzhiyun 
process_deferred_actions(struct datapath * dp)1548*4882a593Smuzhiyun static void process_deferred_actions(struct datapath *dp)
1549*4882a593Smuzhiyun {
1550*4882a593Smuzhiyun 	struct action_fifo *fifo = this_cpu_ptr(action_fifos);
1551*4882a593Smuzhiyun 
1552*4882a593Smuzhiyun 	/* Do not touch the FIFO in case there is no deferred actions. */
1553*4882a593Smuzhiyun 	if (action_fifo_is_empty(fifo))
1554*4882a593Smuzhiyun 		return;
1555*4882a593Smuzhiyun 
1556*4882a593Smuzhiyun 	/* Finishing executing all deferred actions. */
1557*4882a593Smuzhiyun 	do {
1558*4882a593Smuzhiyun 		struct deferred_action *da = action_fifo_get(fifo);
1559*4882a593Smuzhiyun 		struct sk_buff *skb = da->skb;
1560*4882a593Smuzhiyun 		struct sw_flow_key *key = &da->pkt_key;
1561*4882a593Smuzhiyun 		const struct nlattr *actions = da->actions;
1562*4882a593Smuzhiyun 		int actions_len = da->actions_len;
1563*4882a593Smuzhiyun 
1564*4882a593Smuzhiyun 		if (actions)
1565*4882a593Smuzhiyun 			do_execute_actions(dp, skb, key, actions, actions_len);
1566*4882a593Smuzhiyun 		else
1567*4882a593Smuzhiyun 			ovs_dp_process_packet(skb, key);
1568*4882a593Smuzhiyun 	} while (!action_fifo_is_empty(fifo));
1569*4882a593Smuzhiyun 
1570*4882a593Smuzhiyun 	/* Reset FIFO for the next packet.  */
1571*4882a593Smuzhiyun 	action_fifo_init(fifo);
1572*4882a593Smuzhiyun }
1573*4882a593Smuzhiyun 
1574*4882a593Smuzhiyun /* Execute a list of actions against 'skb'. */
ovs_execute_actions(struct datapath * dp,struct sk_buff * skb,const struct sw_flow_actions * acts,struct sw_flow_key * key)1575*4882a593Smuzhiyun int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
1576*4882a593Smuzhiyun 			const struct sw_flow_actions *acts,
1577*4882a593Smuzhiyun 			struct sw_flow_key *key)
1578*4882a593Smuzhiyun {
1579*4882a593Smuzhiyun 	int err, level;
1580*4882a593Smuzhiyun 
1581*4882a593Smuzhiyun 	level = __this_cpu_inc_return(exec_actions_level);
1582*4882a593Smuzhiyun 	if (unlikely(level > OVS_RECURSION_LIMIT)) {
1583*4882a593Smuzhiyun 		net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
1584*4882a593Smuzhiyun 				     ovs_dp_name(dp));
1585*4882a593Smuzhiyun 		kfree_skb(skb);
1586*4882a593Smuzhiyun 		err = -ENETDOWN;
1587*4882a593Smuzhiyun 		goto out;
1588*4882a593Smuzhiyun 	}
1589*4882a593Smuzhiyun 
1590*4882a593Smuzhiyun 	OVS_CB(skb)->acts_origlen = acts->orig_len;
1591*4882a593Smuzhiyun 	err = do_execute_actions(dp, skb, key,
1592*4882a593Smuzhiyun 				 acts->actions, acts->actions_len);
1593*4882a593Smuzhiyun 
1594*4882a593Smuzhiyun 	if (level == 1)
1595*4882a593Smuzhiyun 		process_deferred_actions(dp);
1596*4882a593Smuzhiyun 
1597*4882a593Smuzhiyun out:
1598*4882a593Smuzhiyun 	__this_cpu_dec(exec_actions_level);
1599*4882a593Smuzhiyun 	return err;
1600*4882a593Smuzhiyun }
1601*4882a593Smuzhiyun 
action_fifos_init(void)1602*4882a593Smuzhiyun int action_fifos_init(void)
1603*4882a593Smuzhiyun {
1604*4882a593Smuzhiyun 	action_fifos = alloc_percpu(struct action_fifo);
1605*4882a593Smuzhiyun 	if (!action_fifos)
1606*4882a593Smuzhiyun 		return -ENOMEM;
1607*4882a593Smuzhiyun 
1608*4882a593Smuzhiyun 	flow_keys = alloc_percpu(struct action_flow_keys);
1609*4882a593Smuzhiyun 	if (!flow_keys) {
1610*4882a593Smuzhiyun 		free_percpu(action_fifos);
1611*4882a593Smuzhiyun 		return -ENOMEM;
1612*4882a593Smuzhiyun 	}
1613*4882a593Smuzhiyun 
1614*4882a593Smuzhiyun 	return 0;
1615*4882a593Smuzhiyun }
1616*4882a593Smuzhiyun 
action_fifos_exit(void)1617*4882a593Smuzhiyun void action_fifos_exit(void)
1618*4882a593Smuzhiyun {
1619*4882a593Smuzhiyun 	free_percpu(action_fifos);
1620*4882a593Smuzhiyun 	free_percpu(flow_keys);
1621*4882a593Smuzhiyun }
1622