xref: /OK3568_Linux_fs/kernel/net/openvswitch/datapath.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright (c) 2007-2014 Nicira, Inc.
4*4882a593Smuzhiyun  */
5*4882a593Smuzhiyun 
6*4882a593Smuzhiyun #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #include <linux/init.h>
9*4882a593Smuzhiyun #include <linux/module.h>
10*4882a593Smuzhiyun #include <linux/if_arp.h>
11*4882a593Smuzhiyun #include <linux/if_vlan.h>
12*4882a593Smuzhiyun #include <linux/in.h>
13*4882a593Smuzhiyun #include <linux/ip.h>
14*4882a593Smuzhiyun #include <linux/jhash.h>
15*4882a593Smuzhiyun #include <linux/delay.h>
16*4882a593Smuzhiyun #include <linux/time.h>
17*4882a593Smuzhiyun #include <linux/etherdevice.h>
18*4882a593Smuzhiyun #include <linux/genetlink.h>
19*4882a593Smuzhiyun #include <linux/kernel.h>
20*4882a593Smuzhiyun #include <linux/kthread.h>
21*4882a593Smuzhiyun #include <linux/mutex.h>
22*4882a593Smuzhiyun #include <linux/percpu.h>
23*4882a593Smuzhiyun #include <linux/rcupdate.h>
24*4882a593Smuzhiyun #include <linux/tcp.h>
25*4882a593Smuzhiyun #include <linux/udp.h>
26*4882a593Smuzhiyun #include <linux/ethtool.h>
27*4882a593Smuzhiyun #include <linux/wait.h>
28*4882a593Smuzhiyun #include <asm/div64.h>
29*4882a593Smuzhiyun #include <linux/highmem.h>
30*4882a593Smuzhiyun #include <linux/netfilter_bridge.h>
31*4882a593Smuzhiyun #include <linux/netfilter_ipv4.h>
32*4882a593Smuzhiyun #include <linux/inetdevice.h>
33*4882a593Smuzhiyun #include <linux/list.h>
34*4882a593Smuzhiyun #include <linux/openvswitch.h>
35*4882a593Smuzhiyun #include <linux/rculist.h>
36*4882a593Smuzhiyun #include <linux/dmi.h>
37*4882a593Smuzhiyun #include <net/genetlink.h>
38*4882a593Smuzhiyun #include <net/net_namespace.h>
39*4882a593Smuzhiyun #include <net/netns/generic.h>
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun #include "datapath.h"
42*4882a593Smuzhiyun #include "flow.h"
43*4882a593Smuzhiyun #include "flow_table.h"
44*4882a593Smuzhiyun #include "flow_netlink.h"
45*4882a593Smuzhiyun #include "meter.h"
46*4882a593Smuzhiyun #include "vport-internal_dev.h"
47*4882a593Smuzhiyun #include "vport-netdev.h"
48*4882a593Smuzhiyun 
49*4882a593Smuzhiyun unsigned int ovs_net_id __read_mostly;
50*4882a593Smuzhiyun 
51*4882a593Smuzhiyun static struct genl_family dp_packet_genl_family;
52*4882a593Smuzhiyun static struct genl_family dp_flow_genl_family;
53*4882a593Smuzhiyun static struct genl_family dp_datapath_genl_family;
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun static const struct nla_policy flow_policy[];
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
58*4882a593Smuzhiyun 	.name = OVS_FLOW_MCGROUP,
59*4882a593Smuzhiyun };
60*4882a593Smuzhiyun 
61*4882a593Smuzhiyun static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
62*4882a593Smuzhiyun 	.name = OVS_DATAPATH_MCGROUP,
63*4882a593Smuzhiyun };
64*4882a593Smuzhiyun 
65*4882a593Smuzhiyun static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
66*4882a593Smuzhiyun 	.name = OVS_VPORT_MCGROUP,
67*4882a593Smuzhiyun };
68*4882a593Smuzhiyun 
69*4882a593Smuzhiyun /* Check if need to build a reply message.
70*4882a593Smuzhiyun  * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
ovs_must_notify(struct genl_family * family,struct genl_info * info,unsigned int group)71*4882a593Smuzhiyun static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
72*4882a593Smuzhiyun 			    unsigned int group)
73*4882a593Smuzhiyun {
74*4882a593Smuzhiyun 	return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
75*4882a593Smuzhiyun 	       genl_has_listeners(family, genl_info_net(info), group);
76*4882a593Smuzhiyun }
77*4882a593Smuzhiyun 
ovs_notify(struct genl_family * family,struct sk_buff * skb,struct genl_info * info)78*4882a593Smuzhiyun static void ovs_notify(struct genl_family *family,
79*4882a593Smuzhiyun 		       struct sk_buff *skb, struct genl_info *info)
80*4882a593Smuzhiyun {
81*4882a593Smuzhiyun 	genl_notify(family, skb, info, 0, GFP_KERNEL);
82*4882a593Smuzhiyun }
83*4882a593Smuzhiyun 
84*4882a593Smuzhiyun /**
85*4882a593Smuzhiyun  * DOC: Locking:
86*4882a593Smuzhiyun  *
87*4882a593Smuzhiyun  * All writes e.g. Writes to device state (add/remove datapath, port, set
88*4882a593Smuzhiyun  * operations on vports, etc.), Writes to other state (flow table
89*4882a593Smuzhiyun  * modifications, set miscellaneous datapath parameters, etc.) are protected
90*4882a593Smuzhiyun  * by ovs_lock.
91*4882a593Smuzhiyun  *
92*4882a593Smuzhiyun  * Reads are protected by RCU.
93*4882a593Smuzhiyun  *
94*4882a593Smuzhiyun  * There are a few special cases (mostly stats) that have their own
95*4882a593Smuzhiyun  * synchronization but they nest under all of above and don't interact with
96*4882a593Smuzhiyun  * each other.
97*4882a593Smuzhiyun  *
98*4882a593Smuzhiyun  * The RTNL lock nests inside ovs_mutex.
99*4882a593Smuzhiyun  */
100*4882a593Smuzhiyun 
101*4882a593Smuzhiyun static DEFINE_MUTEX(ovs_mutex);
102*4882a593Smuzhiyun 
ovs_lock(void)103*4882a593Smuzhiyun void ovs_lock(void)
104*4882a593Smuzhiyun {
105*4882a593Smuzhiyun 	mutex_lock(&ovs_mutex);
106*4882a593Smuzhiyun }
107*4882a593Smuzhiyun 
ovs_unlock(void)108*4882a593Smuzhiyun void ovs_unlock(void)
109*4882a593Smuzhiyun {
110*4882a593Smuzhiyun 	mutex_unlock(&ovs_mutex);
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun #ifdef CONFIG_LOCKDEP
lockdep_ovsl_is_held(void)114*4882a593Smuzhiyun int lockdep_ovsl_is_held(void)
115*4882a593Smuzhiyun {
116*4882a593Smuzhiyun 	if (debug_locks)
117*4882a593Smuzhiyun 		return lockdep_is_held(&ovs_mutex);
118*4882a593Smuzhiyun 	else
119*4882a593Smuzhiyun 		return 1;
120*4882a593Smuzhiyun }
121*4882a593Smuzhiyun #endif
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun static struct vport *new_vport(const struct vport_parms *);
124*4882a593Smuzhiyun static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
125*4882a593Smuzhiyun 			     const struct sw_flow_key *,
126*4882a593Smuzhiyun 			     const struct dp_upcall_info *,
127*4882a593Smuzhiyun 			     uint32_t cutlen);
128*4882a593Smuzhiyun static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
129*4882a593Smuzhiyun 				  const struct sw_flow_key *,
130*4882a593Smuzhiyun 				  const struct dp_upcall_info *,
131*4882a593Smuzhiyun 				  uint32_t cutlen);
132*4882a593Smuzhiyun 
133*4882a593Smuzhiyun static void ovs_dp_masks_rebalance(struct work_struct *work);
134*4882a593Smuzhiyun 
135*4882a593Smuzhiyun /* Must be called with rcu_read_lock or ovs_mutex. */
ovs_dp_name(const struct datapath * dp)136*4882a593Smuzhiyun const char *ovs_dp_name(const struct datapath *dp)
137*4882a593Smuzhiyun {
138*4882a593Smuzhiyun 	struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
139*4882a593Smuzhiyun 	return ovs_vport_name(vport);
140*4882a593Smuzhiyun }
141*4882a593Smuzhiyun 
get_dpifindex(const struct datapath * dp)142*4882a593Smuzhiyun static int get_dpifindex(const struct datapath *dp)
143*4882a593Smuzhiyun {
144*4882a593Smuzhiyun 	struct vport *local;
145*4882a593Smuzhiyun 	int ifindex;
146*4882a593Smuzhiyun 
147*4882a593Smuzhiyun 	rcu_read_lock();
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun 	local = ovs_vport_rcu(dp, OVSP_LOCAL);
150*4882a593Smuzhiyun 	if (local)
151*4882a593Smuzhiyun 		ifindex = local->dev->ifindex;
152*4882a593Smuzhiyun 	else
153*4882a593Smuzhiyun 		ifindex = 0;
154*4882a593Smuzhiyun 
155*4882a593Smuzhiyun 	rcu_read_unlock();
156*4882a593Smuzhiyun 
157*4882a593Smuzhiyun 	return ifindex;
158*4882a593Smuzhiyun }
159*4882a593Smuzhiyun 
destroy_dp_rcu(struct rcu_head * rcu)160*4882a593Smuzhiyun static void destroy_dp_rcu(struct rcu_head *rcu)
161*4882a593Smuzhiyun {
162*4882a593Smuzhiyun 	struct datapath *dp = container_of(rcu, struct datapath, rcu);
163*4882a593Smuzhiyun 
164*4882a593Smuzhiyun 	ovs_flow_tbl_destroy(&dp->table);
165*4882a593Smuzhiyun 	free_percpu(dp->stats_percpu);
166*4882a593Smuzhiyun 	kfree(dp->ports);
167*4882a593Smuzhiyun 	ovs_meters_exit(dp);
168*4882a593Smuzhiyun 	kfree(dp);
169*4882a593Smuzhiyun }
170*4882a593Smuzhiyun 
vport_hash_bucket(const struct datapath * dp,u16 port_no)171*4882a593Smuzhiyun static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
172*4882a593Smuzhiyun 					    u16 port_no)
173*4882a593Smuzhiyun {
174*4882a593Smuzhiyun 	return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
175*4882a593Smuzhiyun }
176*4882a593Smuzhiyun 
177*4882a593Smuzhiyun /* Called with ovs_mutex or RCU read lock. */
ovs_lookup_vport(const struct datapath * dp,u16 port_no)178*4882a593Smuzhiyun struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
179*4882a593Smuzhiyun {
180*4882a593Smuzhiyun 	struct vport *vport;
181*4882a593Smuzhiyun 	struct hlist_head *head;
182*4882a593Smuzhiyun 
183*4882a593Smuzhiyun 	head = vport_hash_bucket(dp, port_no);
184*4882a593Smuzhiyun 	hlist_for_each_entry_rcu(vport, head, dp_hash_node,
185*4882a593Smuzhiyun 				 lockdep_ovsl_is_held()) {
186*4882a593Smuzhiyun 		if (vport->port_no == port_no)
187*4882a593Smuzhiyun 			return vport;
188*4882a593Smuzhiyun 	}
189*4882a593Smuzhiyun 	return NULL;
190*4882a593Smuzhiyun }
191*4882a593Smuzhiyun 
192*4882a593Smuzhiyun /* Called with ovs_mutex. */
new_vport(const struct vport_parms * parms)193*4882a593Smuzhiyun static struct vport *new_vport(const struct vport_parms *parms)
194*4882a593Smuzhiyun {
195*4882a593Smuzhiyun 	struct vport *vport;
196*4882a593Smuzhiyun 
197*4882a593Smuzhiyun 	vport = ovs_vport_add(parms);
198*4882a593Smuzhiyun 	if (!IS_ERR(vport)) {
199*4882a593Smuzhiyun 		struct datapath *dp = parms->dp;
200*4882a593Smuzhiyun 		struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
201*4882a593Smuzhiyun 
202*4882a593Smuzhiyun 		hlist_add_head_rcu(&vport->dp_hash_node, head);
203*4882a593Smuzhiyun 	}
204*4882a593Smuzhiyun 	return vport;
205*4882a593Smuzhiyun }
206*4882a593Smuzhiyun 
ovs_dp_detach_port(struct vport * p)207*4882a593Smuzhiyun void ovs_dp_detach_port(struct vport *p)
208*4882a593Smuzhiyun {
209*4882a593Smuzhiyun 	ASSERT_OVSL();
210*4882a593Smuzhiyun 
211*4882a593Smuzhiyun 	/* First drop references to device. */
212*4882a593Smuzhiyun 	hlist_del_rcu(&p->dp_hash_node);
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 	/* Then destroy it. */
215*4882a593Smuzhiyun 	ovs_vport_del(p);
216*4882a593Smuzhiyun }
217*4882a593Smuzhiyun 
218*4882a593Smuzhiyun /* Must be called with rcu_read_lock. */
ovs_dp_process_packet(struct sk_buff * skb,struct sw_flow_key * key)219*4882a593Smuzhiyun void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
220*4882a593Smuzhiyun {
221*4882a593Smuzhiyun 	const struct vport *p = OVS_CB(skb)->input_vport;
222*4882a593Smuzhiyun 	struct datapath *dp = p->dp;
223*4882a593Smuzhiyun 	struct sw_flow *flow;
224*4882a593Smuzhiyun 	struct sw_flow_actions *sf_acts;
225*4882a593Smuzhiyun 	struct dp_stats_percpu *stats;
226*4882a593Smuzhiyun 	u64 *stats_counter;
227*4882a593Smuzhiyun 	u32 n_mask_hit;
228*4882a593Smuzhiyun 	u32 n_cache_hit;
229*4882a593Smuzhiyun 	int error;
230*4882a593Smuzhiyun 
231*4882a593Smuzhiyun 	stats = this_cpu_ptr(dp->stats_percpu);
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 	/* Look up flow. */
234*4882a593Smuzhiyun 	flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
235*4882a593Smuzhiyun 					 &n_mask_hit, &n_cache_hit);
236*4882a593Smuzhiyun 	if (unlikely(!flow)) {
237*4882a593Smuzhiyun 		struct dp_upcall_info upcall;
238*4882a593Smuzhiyun 
239*4882a593Smuzhiyun 		memset(&upcall, 0, sizeof(upcall));
240*4882a593Smuzhiyun 		upcall.cmd = OVS_PACKET_CMD_MISS;
241*4882a593Smuzhiyun 		upcall.portid = ovs_vport_find_upcall_portid(p, skb);
242*4882a593Smuzhiyun 		upcall.mru = OVS_CB(skb)->mru;
243*4882a593Smuzhiyun 		error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
244*4882a593Smuzhiyun 		switch (error) {
245*4882a593Smuzhiyun 		case 0:
246*4882a593Smuzhiyun 		case -EAGAIN:
247*4882a593Smuzhiyun 		case -ERESTARTSYS:
248*4882a593Smuzhiyun 		case -EINTR:
249*4882a593Smuzhiyun 			consume_skb(skb);
250*4882a593Smuzhiyun 			break;
251*4882a593Smuzhiyun 		default:
252*4882a593Smuzhiyun 			kfree_skb(skb);
253*4882a593Smuzhiyun 			break;
254*4882a593Smuzhiyun 		}
255*4882a593Smuzhiyun 		stats_counter = &stats->n_missed;
256*4882a593Smuzhiyun 		goto out;
257*4882a593Smuzhiyun 	}
258*4882a593Smuzhiyun 
259*4882a593Smuzhiyun 	ovs_flow_stats_update(flow, key->tp.flags, skb);
260*4882a593Smuzhiyun 	sf_acts = rcu_dereference(flow->sf_acts);
261*4882a593Smuzhiyun 	error = ovs_execute_actions(dp, skb, sf_acts, key);
262*4882a593Smuzhiyun 	if (unlikely(error))
263*4882a593Smuzhiyun 		net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
264*4882a593Smuzhiyun 				    ovs_dp_name(dp), error);
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 	stats_counter = &stats->n_hit;
267*4882a593Smuzhiyun 
268*4882a593Smuzhiyun out:
269*4882a593Smuzhiyun 	/* Update datapath statistics. */
270*4882a593Smuzhiyun 	u64_stats_update_begin(&stats->syncp);
271*4882a593Smuzhiyun 	(*stats_counter)++;
272*4882a593Smuzhiyun 	stats->n_mask_hit += n_mask_hit;
273*4882a593Smuzhiyun 	stats->n_cache_hit += n_cache_hit;
274*4882a593Smuzhiyun 	u64_stats_update_end(&stats->syncp);
275*4882a593Smuzhiyun }
276*4882a593Smuzhiyun 
ovs_dp_upcall(struct datapath * dp,struct sk_buff * skb,const struct sw_flow_key * key,const struct dp_upcall_info * upcall_info,uint32_t cutlen)277*4882a593Smuzhiyun int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
278*4882a593Smuzhiyun 		  const struct sw_flow_key *key,
279*4882a593Smuzhiyun 		  const struct dp_upcall_info *upcall_info,
280*4882a593Smuzhiyun 		  uint32_t cutlen)
281*4882a593Smuzhiyun {
282*4882a593Smuzhiyun 	struct dp_stats_percpu *stats;
283*4882a593Smuzhiyun 	int err;
284*4882a593Smuzhiyun 
285*4882a593Smuzhiyun 	if (upcall_info->portid == 0) {
286*4882a593Smuzhiyun 		err = -ENOTCONN;
287*4882a593Smuzhiyun 		goto err;
288*4882a593Smuzhiyun 	}
289*4882a593Smuzhiyun 
290*4882a593Smuzhiyun 	if (!skb_is_gso(skb))
291*4882a593Smuzhiyun 		err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
292*4882a593Smuzhiyun 	else
293*4882a593Smuzhiyun 		err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
294*4882a593Smuzhiyun 	if (err)
295*4882a593Smuzhiyun 		goto err;
296*4882a593Smuzhiyun 
297*4882a593Smuzhiyun 	return 0;
298*4882a593Smuzhiyun 
299*4882a593Smuzhiyun err:
300*4882a593Smuzhiyun 	stats = this_cpu_ptr(dp->stats_percpu);
301*4882a593Smuzhiyun 
302*4882a593Smuzhiyun 	u64_stats_update_begin(&stats->syncp);
303*4882a593Smuzhiyun 	stats->n_lost++;
304*4882a593Smuzhiyun 	u64_stats_update_end(&stats->syncp);
305*4882a593Smuzhiyun 
306*4882a593Smuzhiyun 	return err;
307*4882a593Smuzhiyun }
308*4882a593Smuzhiyun 
queue_gso_packets(struct datapath * dp,struct sk_buff * skb,const struct sw_flow_key * key,const struct dp_upcall_info * upcall_info,uint32_t cutlen)309*4882a593Smuzhiyun static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
310*4882a593Smuzhiyun 			     const struct sw_flow_key *key,
311*4882a593Smuzhiyun 			     const struct dp_upcall_info *upcall_info,
312*4882a593Smuzhiyun 			     uint32_t cutlen)
313*4882a593Smuzhiyun {
314*4882a593Smuzhiyun 	unsigned int gso_type = skb_shinfo(skb)->gso_type;
315*4882a593Smuzhiyun 	struct sw_flow_key later_key;
316*4882a593Smuzhiyun 	struct sk_buff *segs, *nskb;
317*4882a593Smuzhiyun 	int err;
318*4882a593Smuzhiyun 
319*4882a593Smuzhiyun 	BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET);
320*4882a593Smuzhiyun 	segs = __skb_gso_segment(skb, NETIF_F_SG, false);
321*4882a593Smuzhiyun 	if (IS_ERR(segs))
322*4882a593Smuzhiyun 		return PTR_ERR(segs);
323*4882a593Smuzhiyun 	if (segs == NULL)
324*4882a593Smuzhiyun 		return -EINVAL;
325*4882a593Smuzhiyun 
326*4882a593Smuzhiyun 	if (gso_type & SKB_GSO_UDP) {
327*4882a593Smuzhiyun 		/* The initial flow key extracted by ovs_flow_key_extract()
328*4882a593Smuzhiyun 		 * in this case is for a first fragment, so we need to
329*4882a593Smuzhiyun 		 * properly mark later fragments.
330*4882a593Smuzhiyun 		 */
331*4882a593Smuzhiyun 		later_key = *key;
332*4882a593Smuzhiyun 		later_key.ip.frag = OVS_FRAG_TYPE_LATER;
333*4882a593Smuzhiyun 	}
334*4882a593Smuzhiyun 
335*4882a593Smuzhiyun 	/* Queue all of the segments. */
336*4882a593Smuzhiyun 	skb_list_walk_safe(segs, skb, nskb) {
337*4882a593Smuzhiyun 		if (gso_type & SKB_GSO_UDP && skb != segs)
338*4882a593Smuzhiyun 			key = &later_key;
339*4882a593Smuzhiyun 
340*4882a593Smuzhiyun 		err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
341*4882a593Smuzhiyun 		if (err)
342*4882a593Smuzhiyun 			break;
343*4882a593Smuzhiyun 
344*4882a593Smuzhiyun 	}
345*4882a593Smuzhiyun 
346*4882a593Smuzhiyun 	/* Free all of the segments. */
347*4882a593Smuzhiyun 	skb_list_walk_safe(segs, skb, nskb) {
348*4882a593Smuzhiyun 		if (err)
349*4882a593Smuzhiyun 			kfree_skb(skb);
350*4882a593Smuzhiyun 		else
351*4882a593Smuzhiyun 			consume_skb(skb);
352*4882a593Smuzhiyun 	}
353*4882a593Smuzhiyun 	return err;
354*4882a593Smuzhiyun }
355*4882a593Smuzhiyun 
upcall_msg_size(const struct dp_upcall_info * upcall_info,unsigned int hdrlen,int actions_attrlen)356*4882a593Smuzhiyun static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
357*4882a593Smuzhiyun 			      unsigned int hdrlen, int actions_attrlen)
358*4882a593Smuzhiyun {
359*4882a593Smuzhiyun 	size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
360*4882a593Smuzhiyun 		+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
361*4882a593Smuzhiyun 		+ nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
362*4882a593Smuzhiyun 		+ nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
363*4882a593Smuzhiyun 		+ nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
364*4882a593Smuzhiyun 
365*4882a593Smuzhiyun 	/* OVS_PACKET_ATTR_USERDATA */
366*4882a593Smuzhiyun 	if (upcall_info->userdata)
367*4882a593Smuzhiyun 		size += NLA_ALIGN(upcall_info->userdata->nla_len);
368*4882a593Smuzhiyun 
369*4882a593Smuzhiyun 	/* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
370*4882a593Smuzhiyun 	if (upcall_info->egress_tun_info)
371*4882a593Smuzhiyun 		size += nla_total_size(ovs_tun_key_attr_size());
372*4882a593Smuzhiyun 
373*4882a593Smuzhiyun 	/* OVS_PACKET_ATTR_ACTIONS */
374*4882a593Smuzhiyun 	if (upcall_info->actions_len)
375*4882a593Smuzhiyun 		size += nla_total_size(actions_attrlen);
376*4882a593Smuzhiyun 
377*4882a593Smuzhiyun 	/* OVS_PACKET_ATTR_MRU */
378*4882a593Smuzhiyun 	if (upcall_info->mru)
379*4882a593Smuzhiyun 		size += nla_total_size(sizeof(upcall_info->mru));
380*4882a593Smuzhiyun 
381*4882a593Smuzhiyun 	return size;
382*4882a593Smuzhiyun }
383*4882a593Smuzhiyun 
pad_packet(struct datapath * dp,struct sk_buff * skb)384*4882a593Smuzhiyun static void pad_packet(struct datapath *dp, struct sk_buff *skb)
385*4882a593Smuzhiyun {
386*4882a593Smuzhiyun 	if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
387*4882a593Smuzhiyun 		size_t plen = NLA_ALIGN(skb->len) - skb->len;
388*4882a593Smuzhiyun 
389*4882a593Smuzhiyun 		if (plen > 0)
390*4882a593Smuzhiyun 			skb_put_zero(skb, plen);
391*4882a593Smuzhiyun 	}
392*4882a593Smuzhiyun }
393*4882a593Smuzhiyun 
queue_userspace_packet(struct datapath * dp,struct sk_buff * skb,const struct sw_flow_key * key,const struct dp_upcall_info * upcall_info,uint32_t cutlen)394*4882a593Smuzhiyun static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
395*4882a593Smuzhiyun 				  const struct sw_flow_key *key,
396*4882a593Smuzhiyun 				  const struct dp_upcall_info *upcall_info,
397*4882a593Smuzhiyun 				  uint32_t cutlen)
398*4882a593Smuzhiyun {
399*4882a593Smuzhiyun 	struct ovs_header *upcall;
400*4882a593Smuzhiyun 	struct sk_buff *nskb = NULL;
401*4882a593Smuzhiyun 	struct sk_buff *user_skb = NULL; /* to be queued to userspace */
402*4882a593Smuzhiyun 	struct nlattr *nla;
403*4882a593Smuzhiyun 	size_t len;
404*4882a593Smuzhiyun 	unsigned int hlen;
405*4882a593Smuzhiyun 	int err, dp_ifindex;
406*4882a593Smuzhiyun 	u64 hash;
407*4882a593Smuzhiyun 
408*4882a593Smuzhiyun 	dp_ifindex = get_dpifindex(dp);
409*4882a593Smuzhiyun 	if (!dp_ifindex)
410*4882a593Smuzhiyun 		return -ENODEV;
411*4882a593Smuzhiyun 
412*4882a593Smuzhiyun 	if (skb_vlan_tag_present(skb)) {
413*4882a593Smuzhiyun 		nskb = skb_clone(skb, GFP_ATOMIC);
414*4882a593Smuzhiyun 		if (!nskb)
415*4882a593Smuzhiyun 			return -ENOMEM;
416*4882a593Smuzhiyun 
417*4882a593Smuzhiyun 		nskb = __vlan_hwaccel_push_inside(nskb);
418*4882a593Smuzhiyun 		if (!nskb)
419*4882a593Smuzhiyun 			return -ENOMEM;
420*4882a593Smuzhiyun 
421*4882a593Smuzhiyun 		skb = nskb;
422*4882a593Smuzhiyun 	}
423*4882a593Smuzhiyun 
424*4882a593Smuzhiyun 	if (nla_attr_size(skb->len) > USHRT_MAX) {
425*4882a593Smuzhiyun 		err = -EFBIG;
426*4882a593Smuzhiyun 		goto out;
427*4882a593Smuzhiyun 	}
428*4882a593Smuzhiyun 
429*4882a593Smuzhiyun 	/* Complete checksum if needed */
430*4882a593Smuzhiyun 	if (skb->ip_summed == CHECKSUM_PARTIAL &&
431*4882a593Smuzhiyun 	    (err = skb_csum_hwoffload_help(skb, 0)))
432*4882a593Smuzhiyun 		goto out;
433*4882a593Smuzhiyun 
434*4882a593Smuzhiyun 	/* Older versions of OVS user space enforce alignment of the last
435*4882a593Smuzhiyun 	 * Netlink attribute to NLA_ALIGNTO which would require extensive
436*4882a593Smuzhiyun 	 * padding logic. Only perform zerocopy if padding is not required.
437*4882a593Smuzhiyun 	 */
438*4882a593Smuzhiyun 	if (dp->user_features & OVS_DP_F_UNALIGNED)
439*4882a593Smuzhiyun 		hlen = skb_zerocopy_headlen(skb);
440*4882a593Smuzhiyun 	else
441*4882a593Smuzhiyun 		hlen = skb->len;
442*4882a593Smuzhiyun 
443*4882a593Smuzhiyun 	len = upcall_msg_size(upcall_info, hlen - cutlen,
444*4882a593Smuzhiyun 			      OVS_CB(skb)->acts_origlen);
445*4882a593Smuzhiyun 	user_skb = genlmsg_new(len, GFP_ATOMIC);
446*4882a593Smuzhiyun 	if (!user_skb) {
447*4882a593Smuzhiyun 		err = -ENOMEM;
448*4882a593Smuzhiyun 		goto out;
449*4882a593Smuzhiyun 	}
450*4882a593Smuzhiyun 
451*4882a593Smuzhiyun 	upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
452*4882a593Smuzhiyun 			     0, upcall_info->cmd);
453*4882a593Smuzhiyun 	if (!upcall) {
454*4882a593Smuzhiyun 		err = -EINVAL;
455*4882a593Smuzhiyun 		goto out;
456*4882a593Smuzhiyun 	}
457*4882a593Smuzhiyun 	upcall->dp_ifindex = dp_ifindex;
458*4882a593Smuzhiyun 
459*4882a593Smuzhiyun 	err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
460*4882a593Smuzhiyun 	if (err)
461*4882a593Smuzhiyun 		goto out;
462*4882a593Smuzhiyun 
463*4882a593Smuzhiyun 	if (upcall_info->userdata)
464*4882a593Smuzhiyun 		__nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
465*4882a593Smuzhiyun 			  nla_len(upcall_info->userdata),
466*4882a593Smuzhiyun 			  nla_data(upcall_info->userdata));
467*4882a593Smuzhiyun 
468*4882a593Smuzhiyun 	if (upcall_info->egress_tun_info) {
469*4882a593Smuzhiyun 		nla = nla_nest_start_noflag(user_skb,
470*4882a593Smuzhiyun 					    OVS_PACKET_ATTR_EGRESS_TUN_KEY);
471*4882a593Smuzhiyun 		if (!nla) {
472*4882a593Smuzhiyun 			err = -EMSGSIZE;
473*4882a593Smuzhiyun 			goto out;
474*4882a593Smuzhiyun 		}
475*4882a593Smuzhiyun 		err = ovs_nla_put_tunnel_info(user_skb,
476*4882a593Smuzhiyun 					      upcall_info->egress_tun_info);
477*4882a593Smuzhiyun 		if (err)
478*4882a593Smuzhiyun 			goto out;
479*4882a593Smuzhiyun 
480*4882a593Smuzhiyun 		nla_nest_end(user_skb, nla);
481*4882a593Smuzhiyun 	}
482*4882a593Smuzhiyun 
483*4882a593Smuzhiyun 	if (upcall_info->actions_len) {
484*4882a593Smuzhiyun 		nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS);
485*4882a593Smuzhiyun 		if (!nla) {
486*4882a593Smuzhiyun 			err = -EMSGSIZE;
487*4882a593Smuzhiyun 			goto out;
488*4882a593Smuzhiyun 		}
489*4882a593Smuzhiyun 		err = ovs_nla_put_actions(upcall_info->actions,
490*4882a593Smuzhiyun 					  upcall_info->actions_len,
491*4882a593Smuzhiyun 					  user_skb);
492*4882a593Smuzhiyun 		if (!err)
493*4882a593Smuzhiyun 			nla_nest_end(user_skb, nla);
494*4882a593Smuzhiyun 		else
495*4882a593Smuzhiyun 			nla_nest_cancel(user_skb, nla);
496*4882a593Smuzhiyun 	}
497*4882a593Smuzhiyun 
498*4882a593Smuzhiyun 	/* Add OVS_PACKET_ATTR_MRU */
499*4882a593Smuzhiyun 	if (upcall_info->mru &&
500*4882a593Smuzhiyun 	    nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
501*4882a593Smuzhiyun 		err = -ENOBUFS;
502*4882a593Smuzhiyun 		goto out;
503*4882a593Smuzhiyun 	}
504*4882a593Smuzhiyun 
505*4882a593Smuzhiyun 	/* Add OVS_PACKET_ATTR_LEN when packet is truncated */
506*4882a593Smuzhiyun 	if (cutlen > 0 &&
507*4882a593Smuzhiyun 	    nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
508*4882a593Smuzhiyun 		err = -ENOBUFS;
509*4882a593Smuzhiyun 		goto out;
510*4882a593Smuzhiyun 	}
511*4882a593Smuzhiyun 
512*4882a593Smuzhiyun 	/* Add OVS_PACKET_ATTR_HASH */
513*4882a593Smuzhiyun 	hash = skb_get_hash_raw(skb);
514*4882a593Smuzhiyun 	if (skb->sw_hash)
515*4882a593Smuzhiyun 		hash |= OVS_PACKET_HASH_SW_BIT;
516*4882a593Smuzhiyun 
517*4882a593Smuzhiyun 	if (skb->l4_hash)
518*4882a593Smuzhiyun 		hash |= OVS_PACKET_HASH_L4_BIT;
519*4882a593Smuzhiyun 
520*4882a593Smuzhiyun 	if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
521*4882a593Smuzhiyun 		err = -ENOBUFS;
522*4882a593Smuzhiyun 		goto out;
523*4882a593Smuzhiyun 	}
524*4882a593Smuzhiyun 
525*4882a593Smuzhiyun 	/* Only reserve room for attribute header, packet data is added
526*4882a593Smuzhiyun 	 * in skb_zerocopy() */
527*4882a593Smuzhiyun 	if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
528*4882a593Smuzhiyun 		err = -ENOBUFS;
529*4882a593Smuzhiyun 		goto out;
530*4882a593Smuzhiyun 	}
531*4882a593Smuzhiyun 	nla->nla_len = nla_attr_size(skb->len - cutlen);
532*4882a593Smuzhiyun 
533*4882a593Smuzhiyun 	err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
534*4882a593Smuzhiyun 	if (err)
535*4882a593Smuzhiyun 		goto out;
536*4882a593Smuzhiyun 
537*4882a593Smuzhiyun 	/* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
538*4882a593Smuzhiyun 	pad_packet(dp, user_skb);
539*4882a593Smuzhiyun 
540*4882a593Smuzhiyun 	((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
541*4882a593Smuzhiyun 
542*4882a593Smuzhiyun 	err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
543*4882a593Smuzhiyun 	user_skb = NULL;
544*4882a593Smuzhiyun out:
545*4882a593Smuzhiyun 	if (err)
546*4882a593Smuzhiyun 		skb_tx_error(skb);
547*4882a593Smuzhiyun 	consume_skb(user_skb);
548*4882a593Smuzhiyun 	consume_skb(nskb);
549*4882a593Smuzhiyun 
550*4882a593Smuzhiyun 	return err;
551*4882a593Smuzhiyun }
552*4882a593Smuzhiyun 
ovs_packet_cmd_execute(struct sk_buff * skb,struct genl_info * info)553*4882a593Smuzhiyun static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
554*4882a593Smuzhiyun {
555*4882a593Smuzhiyun 	struct ovs_header *ovs_header = info->userhdr;
556*4882a593Smuzhiyun 	struct net *net = sock_net(skb->sk);
557*4882a593Smuzhiyun 	struct nlattr **a = info->attrs;
558*4882a593Smuzhiyun 	struct sw_flow_actions *acts;
559*4882a593Smuzhiyun 	struct sk_buff *packet;
560*4882a593Smuzhiyun 	struct sw_flow *flow;
561*4882a593Smuzhiyun 	struct sw_flow_actions *sf_acts;
562*4882a593Smuzhiyun 	struct datapath *dp;
563*4882a593Smuzhiyun 	struct vport *input_vport;
564*4882a593Smuzhiyun 	u16 mru = 0;
565*4882a593Smuzhiyun 	u64 hash;
566*4882a593Smuzhiyun 	int len;
567*4882a593Smuzhiyun 	int err;
568*4882a593Smuzhiyun 	bool log = !a[OVS_PACKET_ATTR_PROBE];
569*4882a593Smuzhiyun 
570*4882a593Smuzhiyun 	err = -EINVAL;
571*4882a593Smuzhiyun 	if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
572*4882a593Smuzhiyun 	    !a[OVS_PACKET_ATTR_ACTIONS])
573*4882a593Smuzhiyun 		goto err;
574*4882a593Smuzhiyun 
575*4882a593Smuzhiyun 	len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
576*4882a593Smuzhiyun 	packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
577*4882a593Smuzhiyun 	err = -ENOMEM;
578*4882a593Smuzhiyun 	if (!packet)
579*4882a593Smuzhiyun 		goto err;
580*4882a593Smuzhiyun 	skb_reserve(packet, NET_IP_ALIGN);
581*4882a593Smuzhiyun 
582*4882a593Smuzhiyun 	nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
583*4882a593Smuzhiyun 
584*4882a593Smuzhiyun 	/* Set packet's mru */
585*4882a593Smuzhiyun 	if (a[OVS_PACKET_ATTR_MRU]) {
586*4882a593Smuzhiyun 		mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
587*4882a593Smuzhiyun 		packet->ignore_df = 1;
588*4882a593Smuzhiyun 	}
589*4882a593Smuzhiyun 	OVS_CB(packet)->mru = mru;
590*4882a593Smuzhiyun 
591*4882a593Smuzhiyun 	if (a[OVS_PACKET_ATTR_HASH]) {
592*4882a593Smuzhiyun 		hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
593*4882a593Smuzhiyun 
594*4882a593Smuzhiyun 		__skb_set_hash(packet, hash & 0xFFFFFFFFULL,
595*4882a593Smuzhiyun 			       !!(hash & OVS_PACKET_HASH_SW_BIT),
596*4882a593Smuzhiyun 			       !!(hash & OVS_PACKET_HASH_L4_BIT));
597*4882a593Smuzhiyun 	}
598*4882a593Smuzhiyun 
599*4882a593Smuzhiyun 	/* Build an sw_flow for sending this packet. */
600*4882a593Smuzhiyun 	flow = ovs_flow_alloc();
601*4882a593Smuzhiyun 	err = PTR_ERR(flow);
602*4882a593Smuzhiyun 	if (IS_ERR(flow))
603*4882a593Smuzhiyun 		goto err_kfree_skb;
604*4882a593Smuzhiyun 
605*4882a593Smuzhiyun 	err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
606*4882a593Smuzhiyun 					     packet, &flow->key, log);
607*4882a593Smuzhiyun 	if (err)
608*4882a593Smuzhiyun 		goto err_flow_free;
609*4882a593Smuzhiyun 
610*4882a593Smuzhiyun 	err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
611*4882a593Smuzhiyun 				   &flow->key, &acts, log);
612*4882a593Smuzhiyun 	if (err)
613*4882a593Smuzhiyun 		goto err_flow_free;
614*4882a593Smuzhiyun 
615*4882a593Smuzhiyun 	rcu_assign_pointer(flow->sf_acts, acts);
616*4882a593Smuzhiyun 	packet->priority = flow->key.phy.priority;
617*4882a593Smuzhiyun 	packet->mark = flow->key.phy.skb_mark;
618*4882a593Smuzhiyun 
619*4882a593Smuzhiyun 	rcu_read_lock();
620*4882a593Smuzhiyun 	dp = get_dp_rcu(net, ovs_header->dp_ifindex);
621*4882a593Smuzhiyun 	err = -ENODEV;
622*4882a593Smuzhiyun 	if (!dp)
623*4882a593Smuzhiyun 		goto err_unlock;
624*4882a593Smuzhiyun 
625*4882a593Smuzhiyun 	input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
626*4882a593Smuzhiyun 	if (!input_vport)
627*4882a593Smuzhiyun 		input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
628*4882a593Smuzhiyun 
629*4882a593Smuzhiyun 	if (!input_vport)
630*4882a593Smuzhiyun 		goto err_unlock;
631*4882a593Smuzhiyun 
632*4882a593Smuzhiyun 	packet->dev = input_vport->dev;
633*4882a593Smuzhiyun 	OVS_CB(packet)->input_vport = input_vport;
634*4882a593Smuzhiyun 	sf_acts = rcu_dereference(flow->sf_acts);
635*4882a593Smuzhiyun 
636*4882a593Smuzhiyun 	local_bh_disable();
637*4882a593Smuzhiyun 	err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
638*4882a593Smuzhiyun 	local_bh_enable();
639*4882a593Smuzhiyun 	rcu_read_unlock();
640*4882a593Smuzhiyun 
641*4882a593Smuzhiyun 	ovs_flow_free(flow, false);
642*4882a593Smuzhiyun 	return err;
643*4882a593Smuzhiyun 
644*4882a593Smuzhiyun err_unlock:
645*4882a593Smuzhiyun 	rcu_read_unlock();
646*4882a593Smuzhiyun err_flow_free:
647*4882a593Smuzhiyun 	ovs_flow_free(flow, false);
648*4882a593Smuzhiyun err_kfree_skb:
649*4882a593Smuzhiyun 	kfree_skb(packet);
650*4882a593Smuzhiyun err:
651*4882a593Smuzhiyun 	return err;
652*4882a593Smuzhiyun }
653*4882a593Smuzhiyun 
654*4882a593Smuzhiyun static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
655*4882a593Smuzhiyun 	[OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
656*4882a593Smuzhiyun 	[OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
657*4882a593Smuzhiyun 	[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
658*4882a593Smuzhiyun 	[OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
659*4882a593Smuzhiyun 	[OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
660*4882a593Smuzhiyun 	[OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
661*4882a593Smuzhiyun };
662*4882a593Smuzhiyun 
663*4882a593Smuzhiyun static const struct genl_small_ops dp_packet_genl_ops[] = {
664*4882a593Smuzhiyun 	{ .cmd = OVS_PACKET_CMD_EXECUTE,
665*4882a593Smuzhiyun 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
666*4882a593Smuzhiyun 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
667*4882a593Smuzhiyun 	  .doit = ovs_packet_cmd_execute
668*4882a593Smuzhiyun 	}
669*4882a593Smuzhiyun };
670*4882a593Smuzhiyun 
671*4882a593Smuzhiyun static struct genl_family dp_packet_genl_family __ro_after_init = {
672*4882a593Smuzhiyun 	.hdrsize = sizeof(struct ovs_header),
673*4882a593Smuzhiyun 	.name = OVS_PACKET_FAMILY,
674*4882a593Smuzhiyun 	.version = OVS_PACKET_VERSION,
675*4882a593Smuzhiyun 	.maxattr = OVS_PACKET_ATTR_MAX,
676*4882a593Smuzhiyun 	.policy = packet_policy,
677*4882a593Smuzhiyun 	.netnsok = true,
678*4882a593Smuzhiyun 	.parallel_ops = true,
679*4882a593Smuzhiyun 	.small_ops = dp_packet_genl_ops,
680*4882a593Smuzhiyun 	.n_small_ops = ARRAY_SIZE(dp_packet_genl_ops),
681*4882a593Smuzhiyun 	.module = THIS_MODULE,
682*4882a593Smuzhiyun };
683*4882a593Smuzhiyun 
get_dp_stats(const struct datapath * dp,struct ovs_dp_stats * stats,struct ovs_dp_megaflow_stats * mega_stats)684*4882a593Smuzhiyun static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
685*4882a593Smuzhiyun 			 struct ovs_dp_megaflow_stats *mega_stats)
686*4882a593Smuzhiyun {
687*4882a593Smuzhiyun 	int i;
688*4882a593Smuzhiyun 
689*4882a593Smuzhiyun 	memset(mega_stats, 0, sizeof(*mega_stats));
690*4882a593Smuzhiyun 
691*4882a593Smuzhiyun 	stats->n_flows = ovs_flow_tbl_count(&dp->table);
692*4882a593Smuzhiyun 	mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
693*4882a593Smuzhiyun 
694*4882a593Smuzhiyun 	stats->n_hit = stats->n_missed = stats->n_lost = 0;
695*4882a593Smuzhiyun 
696*4882a593Smuzhiyun 	for_each_possible_cpu(i) {
697*4882a593Smuzhiyun 		const struct dp_stats_percpu *percpu_stats;
698*4882a593Smuzhiyun 		struct dp_stats_percpu local_stats;
699*4882a593Smuzhiyun 		unsigned int start;
700*4882a593Smuzhiyun 
701*4882a593Smuzhiyun 		percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
702*4882a593Smuzhiyun 
703*4882a593Smuzhiyun 		do {
704*4882a593Smuzhiyun 			start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
705*4882a593Smuzhiyun 			local_stats = *percpu_stats;
706*4882a593Smuzhiyun 		} while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
707*4882a593Smuzhiyun 
708*4882a593Smuzhiyun 		stats->n_hit += local_stats.n_hit;
709*4882a593Smuzhiyun 		stats->n_missed += local_stats.n_missed;
710*4882a593Smuzhiyun 		stats->n_lost += local_stats.n_lost;
711*4882a593Smuzhiyun 		mega_stats->n_mask_hit += local_stats.n_mask_hit;
712*4882a593Smuzhiyun 		mega_stats->n_cache_hit += local_stats.n_cache_hit;
713*4882a593Smuzhiyun 	}
714*4882a593Smuzhiyun }
715*4882a593Smuzhiyun 
should_fill_key(const struct sw_flow_id * sfid,uint32_t ufid_flags)716*4882a593Smuzhiyun static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
717*4882a593Smuzhiyun {
718*4882a593Smuzhiyun 	return ovs_identifier_is_ufid(sfid) &&
719*4882a593Smuzhiyun 	       !(ufid_flags & OVS_UFID_F_OMIT_KEY);
720*4882a593Smuzhiyun }
721*4882a593Smuzhiyun 
should_fill_mask(uint32_t ufid_flags)722*4882a593Smuzhiyun static bool should_fill_mask(uint32_t ufid_flags)
723*4882a593Smuzhiyun {
724*4882a593Smuzhiyun 	return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
725*4882a593Smuzhiyun }
726*4882a593Smuzhiyun 
should_fill_actions(uint32_t ufid_flags)727*4882a593Smuzhiyun static bool should_fill_actions(uint32_t ufid_flags)
728*4882a593Smuzhiyun {
729*4882a593Smuzhiyun 	return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
730*4882a593Smuzhiyun }
731*4882a593Smuzhiyun 
ovs_flow_cmd_msg_size(const struct sw_flow_actions * acts,const struct sw_flow_id * sfid,uint32_t ufid_flags)732*4882a593Smuzhiyun static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
733*4882a593Smuzhiyun 				    const struct sw_flow_id *sfid,
734*4882a593Smuzhiyun 				    uint32_t ufid_flags)
735*4882a593Smuzhiyun {
736*4882a593Smuzhiyun 	size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
737*4882a593Smuzhiyun 
738*4882a593Smuzhiyun 	/* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
739*4882a593Smuzhiyun 	 * see ovs_nla_put_identifier()
740*4882a593Smuzhiyun 	 */
741*4882a593Smuzhiyun 	if (sfid && ovs_identifier_is_ufid(sfid))
742*4882a593Smuzhiyun 		len += nla_total_size(sfid->ufid_len);
743*4882a593Smuzhiyun 	else
744*4882a593Smuzhiyun 		len += nla_total_size(ovs_key_attr_size());
745*4882a593Smuzhiyun 
746*4882a593Smuzhiyun 	/* OVS_FLOW_ATTR_KEY */
747*4882a593Smuzhiyun 	if (!sfid || should_fill_key(sfid, ufid_flags))
748*4882a593Smuzhiyun 		len += nla_total_size(ovs_key_attr_size());
749*4882a593Smuzhiyun 
750*4882a593Smuzhiyun 	/* OVS_FLOW_ATTR_MASK */
751*4882a593Smuzhiyun 	if (should_fill_mask(ufid_flags))
752*4882a593Smuzhiyun 		len += nla_total_size(ovs_key_attr_size());
753*4882a593Smuzhiyun 
754*4882a593Smuzhiyun 	/* OVS_FLOW_ATTR_ACTIONS */
755*4882a593Smuzhiyun 	if (should_fill_actions(ufid_flags))
756*4882a593Smuzhiyun 		len += nla_total_size(acts->orig_len);
757*4882a593Smuzhiyun 
758*4882a593Smuzhiyun 	return len
759*4882a593Smuzhiyun 		+ nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
760*4882a593Smuzhiyun 		+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
761*4882a593Smuzhiyun 		+ nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
762*4882a593Smuzhiyun }
763*4882a593Smuzhiyun 
764*4882a593Smuzhiyun /* Called with ovs_mutex or RCU read lock. */
ovs_flow_cmd_fill_stats(const struct sw_flow * flow,struct sk_buff * skb)765*4882a593Smuzhiyun static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
766*4882a593Smuzhiyun 				   struct sk_buff *skb)
767*4882a593Smuzhiyun {
768*4882a593Smuzhiyun 	struct ovs_flow_stats stats;
769*4882a593Smuzhiyun 	__be16 tcp_flags;
770*4882a593Smuzhiyun 	unsigned long used;
771*4882a593Smuzhiyun 
772*4882a593Smuzhiyun 	ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
773*4882a593Smuzhiyun 
774*4882a593Smuzhiyun 	if (used &&
775*4882a593Smuzhiyun 	    nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
776*4882a593Smuzhiyun 			      OVS_FLOW_ATTR_PAD))
777*4882a593Smuzhiyun 		return -EMSGSIZE;
778*4882a593Smuzhiyun 
779*4882a593Smuzhiyun 	if (stats.n_packets &&
780*4882a593Smuzhiyun 	    nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
781*4882a593Smuzhiyun 			  sizeof(struct ovs_flow_stats), &stats,
782*4882a593Smuzhiyun 			  OVS_FLOW_ATTR_PAD))
783*4882a593Smuzhiyun 		return -EMSGSIZE;
784*4882a593Smuzhiyun 
785*4882a593Smuzhiyun 	if ((u8)ntohs(tcp_flags) &&
786*4882a593Smuzhiyun 	     nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
787*4882a593Smuzhiyun 		return -EMSGSIZE;
788*4882a593Smuzhiyun 
789*4882a593Smuzhiyun 	return 0;
790*4882a593Smuzhiyun }
791*4882a593Smuzhiyun 
792*4882a593Smuzhiyun /* Called with ovs_mutex or RCU read lock. */
ovs_flow_cmd_fill_actions(const struct sw_flow * flow,struct sk_buff * skb,int skb_orig_len)793*4882a593Smuzhiyun static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
794*4882a593Smuzhiyun 				     struct sk_buff *skb, int skb_orig_len)
795*4882a593Smuzhiyun {
796*4882a593Smuzhiyun 	struct nlattr *start;
797*4882a593Smuzhiyun 	int err;
798*4882a593Smuzhiyun 
799*4882a593Smuzhiyun 	/* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
800*4882a593Smuzhiyun 	 * this is the first flow to be dumped into 'skb'.  This is unusual for
801*4882a593Smuzhiyun 	 * Netlink but individual action lists can be longer than
802*4882a593Smuzhiyun 	 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
803*4882a593Smuzhiyun 	 * The userspace caller can always fetch the actions separately if it
804*4882a593Smuzhiyun 	 * really wants them.  (Most userspace callers in fact don't care.)
805*4882a593Smuzhiyun 	 *
806*4882a593Smuzhiyun 	 * This can only fail for dump operations because the skb is always
807*4882a593Smuzhiyun 	 * properly sized for single flows.
808*4882a593Smuzhiyun 	 */
809*4882a593Smuzhiyun 	start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS);
810*4882a593Smuzhiyun 	if (start) {
811*4882a593Smuzhiyun 		const struct sw_flow_actions *sf_acts;
812*4882a593Smuzhiyun 
813*4882a593Smuzhiyun 		sf_acts = rcu_dereference_ovsl(flow->sf_acts);
814*4882a593Smuzhiyun 		err = ovs_nla_put_actions(sf_acts->actions,
815*4882a593Smuzhiyun 					  sf_acts->actions_len, skb);
816*4882a593Smuzhiyun 
817*4882a593Smuzhiyun 		if (!err)
818*4882a593Smuzhiyun 			nla_nest_end(skb, start);
819*4882a593Smuzhiyun 		else {
820*4882a593Smuzhiyun 			if (skb_orig_len)
821*4882a593Smuzhiyun 				return err;
822*4882a593Smuzhiyun 
823*4882a593Smuzhiyun 			nla_nest_cancel(skb, start);
824*4882a593Smuzhiyun 		}
825*4882a593Smuzhiyun 	} else if (skb_orig_len) {
826*4882a593Smuzhiyun 		return -EMSGSIZE;
827*4882a593Smuzhiyun 	}
828*4882a593Smuzhiyun 
829*4882a593Smuzhiyun 	return 0;
830*4882a593Smuzhiyun }
831*4882a593Smuzhiyun 
832*4882a593Smuzhiyun /* Called with ovs_mutex or RCU read lock. */
ovs_flow_cmd_fill_info(const struct sw_flow * flow,int dp_ifindex,struct sk_buff * skb,u32 portid,u32 seq,u32 flags,u8 cmd,u32 ufid_flags)833*4882a593Smuzhiyun static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
834*4882a593Smuzhiyun 				  struct sk_buff *skb, u32 portid,
835*4882a593Smuzhiyun 				  u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
836*4882a593Smuzhiyun {
837*4882a593Smuzhiyun 	const int skb_orig_len = skb->len;
838*4882a593Smuzhiyun 	struct ovs_header *ovs_header;
839*4882a593Smuzhiyun 	int err;
840*4882a593Smuzhiyun 
841*4882a593Smuzhiyun 	ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
842*4882a593Smuzhiyun 				 flags, cmd);
843*4882a593Smuzhiyun 	if (!ovs_header)
844*4882a593Smuzhiyun 		return -EMSGSIZE;
845*4882a593Smuzhiyun 
846*4882a593Smuzhiyun 	ovs_header->dp_ifindex = dp_ifindex;
847*4882a593Smuzhiyun 
848*4882a593Smuzhiyun 	err = ovs_nla_put_identifier(flow, skb);
849*4882a593Smuzhiyun 	if (err)
850*4882a593Smuzhiyun 		goto error;
851*4882a593Smuzhiyun 
852*4882a593Smuzhiyun 	if (should_fill_key(&flow->id, ufid_flags)) {
853*4882a593Smuzhiyun 		err = ovs_nla_put_masked_key(flow, skb);
854*4882a593Smuzhiyun 		if (err)
855*4882a593Smuzhiyun 			goto error;
856*4882a593Smuzhiyun 	}
857*4882a593Smuzhiyun 
858*4882a593Smuzhiyun 	if (should_fill_mask(ufid_flags)) {
859*4882a593Smuzhiyun 		err = ovs_nla_put_mask(flow, skb);
860*4882a593Smuzhiyun 		if (err)
861*4882a593Smuzhiyun 			goto error;
862*4882a593Smuzhiyun 	}
863*4882a593Smuzhiyun 
864*4882a593Smuzhiyun 	err = ovs_flow_cmd_fill_stats(flow, skb);
865*4882a593Smuzhiyun 	if (err)
866*4882a593Smuzhiyun 		goto error;
867*4882a593Smuzhiyun 
868*4882a593Smuzhiyun 	if (should_fill_actions(ufid_flags)) {
869*4882a593Smuzhiyun 		err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
870*4882a593Smuzhiyun 		if (err)
871*4882a593Smuzhiyun 			goto error;
872*4882a593Smuzhiyun 	}
873*4882a593Smuzhiyun 
874*4882a593Smuzhiyun 	genlmsg_end(skb, ovs_header);
875*4882a593Smuzhiyun 	return 0;
876*4882a593Smuzhiyun 
877*4882a593Smuzhiyun error:
878*4882a593Smuzhiyun 	genlmsg_cancel(skb, ovs_header);
879*4882a593Smuzhiyun 	return err;
880*4882a593Smuzhiyun }
881*4882a593Smuzhiyun 
882*4882a593Smuzhiyun /* May not be called with RCU read lock. */
ovs_flow_cmd_alloc_info(const struct sw_flow_actions * acts,const struct sw_flow_id * sfid,struct genl_info * info,bool always,uint32_t ufid_flags)883*4882a593Smuzhiyun static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
884*4882a593Smuzhiyun 					       const struct sw_flow_id *sfid,
885*4882a593Smuzhiyun 					       struct genl_info *info,
886*4882a593Smuzhiyun 					       bool always,
887*4882a593Smuzhiyun 					       uint32_t ufid_flags)
888*4882a593Smuzhiyun {
889*4882a593Smuzhiyun 	struct sk_buff *skb;
890*4882a593Smuzhiyun 	size_t len;
891*4882a593Smuzhiyun 
892*4882a593Smuzhiyun 	if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
893*4882a593Smuzhiyun 		return NULL;
894*4882a593Smuzhiyun 
895*4882a593Smuzhiyun 	len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
896*4882a593Smuzhiyun 	skb = genlmsg_new(len, GFP_KERNEL);
897*4882a593Smuzhiyun 	if (!skb)
898*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
899*4882a593Smuzhiyun 
900*4882a593Smuzhiyun 	return skb;
901*4882a593Smuzhiyun }
902*4882a593Smuzhiyun 
903*4882a593Smuzhiyun /* Called with ovs_mutex. */
ovs_flow_cmd_build_info(const struct sw_flow * flow,int dp_ifindex,struct genl_info * info,u8 cmd,bool always,u32 ufid_flags)904*4882a593Smuzhiyun static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
905*4882a593Smuzhiyun 					       int dp_ifindex,
906*4882a593Smuzhiyun 					       struct genl_info *info, u8 cmd,
907*4882a593Smuzhiyun 					       bool always, u32 ufid_flags)
908*4882a593Smuzhiyun {
909*4882a593Smuzhiyun 	struct sk_buff *skb;
910*4882a593Smuzhiyun 	int retval;
911*4882a593Smuzhiyun 
912*4882a593Smuzhiyun 	skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
913*4882a593Smuzhiyun 				      &flow->id, info, always, ufid_flags);
914*4882a593Smuzhiyun 	if (IS_ERR_OR_NULL(skb))
915*4882a593Smuzhiyun 		return skb;
916*4882a593Smuzhiyun 
917*4882a593Smuzhiyun 	retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
918*4882a593Smuzhiyun 					info->snd_portid, info->snd_seq, 0,
919*4882a593Smuzhiyun 					cmd, ufid_flags);
920*4882a593Smuzhiyun 	if (WARN_ON_ONCE(retval < 0)) {
921*4882a593Smuzhiyun 		kfree_skb(skb);
922*4882a593Smuzhiyun 		skb = ERR_PTR(retval);
923*4882a593Smuzhiyun 	}
924*4882a593Smuzhiyun 	return skb;
925*4882a593Smuzhiyun }
926*4882a593Smuzhiyun 
ovs_flow_cmd_new(struct sk_buff * skb,struct genl_info * info)927*4882a593Smuzhiyun static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
928*4882a593Smuzhiyun {
929*4882a593Smuzhiyun 	struct net *net = sock_net(skb->sk);
930*4882a593Smuzhiyun 	struct nlattr **a = info->attrs;
931*4882a593Smuzhiyun 	struct ovs_header *ovs_header = info->userhdr;
932*4882a593Smuzhiyun 	struct sw_flow *flow = NULL, *new_flow;
933*4882a593Smuzhiyun 	struct sw_flow_mask mask;
934*4882a593Smuzhiyun 	struct sk_buff *reply;
935*4882a593Smuzhiyun 	struct datapath *dp;
936*4882a593Smuzhiyun 	struct sw_flow_actions *acts;
937*4882a593Smuzhiyun 	struct sw_flow_match match;
938*4882a593Smuzhiyun 	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
939*4882a593Smuzhiyun 	int error;
940*4882a593Smuzhiyun 	bool log = !a[OVS_FLOW_ATTR_PROBE];
941*4882a593Smuzhiyun 
942*4882a593Smuzhiyun 	/* Must have key and actions. */
943*4882a593Smuzhiyun 	error = -EINVAL;
944*4882a593Smuzhiyun 	if (!a[OVS_FLOW_ATTR_KEY]) {
945*4882a593Smuzhiyun 		OVS_NLERR(log, "Flow key attr not present in new flow.");
946*4882a593Smuzhiyun 		goto error;
947*4882a593Smuzhiyun 	}
948*4882a593Smuzhiyun 	if (!a[OVS_FLOW_ATTR_ACTIONS]) {
949*4882a593Smuzhiyun 		OVS_NLERR(log, "Flow actions attr not present in new flow.");
950*4882a593Smuzhiyun 		goto error;
951*4882a593Smuzhiyun 	}
952*4882a593Smuzhiyun 
953*4882a593Smuzhiyun 	/* Most of the time we need to allocate a new flow, do it before
954*4882a593Smuzhiyun 	 * locking.
955*4882a593Smuzhiyun 	 */
956*4882a593Smuzhiyun 	new_flow = ovs_flow_alloc();
957*4882a593Smuzhiyun 	if (IS_ERR(new_flow)) {
958*4882a593Smuzhiyun 		error = PTR_ERR(new_flow);
959*4882a593Smuzhiyun 		goto error;
960*4882a593Smuzhiyun 	}
961*4882a593Smuzhiyun 
962*4882a593Smuzhiyun 	/* Extract key. */
963*4882a593Smuzhiyun 	ovs_match_init(&match, &new_flow->key, false, &mask);
964*4882a593Smuzhiyun 	error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
965*4882a593Smuzhiyun 				  a[OVS_FLOW_ATTR_MASK], log);
966*4882a593Smuzhiyun 	if (error)
967*4882a593Smuzhiyun 		goto err_kfree_flow;
968*4882a593Smuzhiyun 
969*4882a593Smuzhiyun 	/* Extract flow identifier. */
970*4882a593Smuzhiyun 	error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
971*4882a593Smuzhiyun 				       &new_flow->key, log);
972*4882a593Smuzhiyun 	if (error)
973*4882a593Smuzhiyun 		goto err_kfree_flow;
974*4882a593Smuzhiyun 
975*4882a593Smuzhiyun 	/* unmasked key is needed to match when ufid is not used. */
976*4882a593Smuzhiyun 	if (ovs_identifier_is_key(&new_flow->id))
977*4882a593Smuzhiyun 		match.key = new_flow->id.unmasked_key;
978*4882a593Smuzhiyun 
979*4882a593Smuzhiyun 	ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask);
980*4882a593Smuzhiyun 
981*4882a593Smuzhiyun 	/* Validate actions. */
982*4882a593Smuzhiyun 	error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
983*4882a593Smuzhiyun 				     &new_flow->key, &acts, log);
984*4882a593Smuzhiyun 	if (error) {
985*4882a593Smuzhiyun 		OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
986*4882a593Smuzhiyun 		goto err_kfree_flow;
987*4882a593Smuzhiyun 	}
988*4882a593Smuzhiyun 
989*4882a593Smuzhiyun 	reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
990*4882a593Smuzhiyun 					ufid_flags);
991*4882a593Smuzhiyun 	if (IS_ERR(reply)) {
992*4882a593Smuzhiyun 		error = PTR_ERR(reply);
993*4882a593Smuzhiyun 		goto err_kfree_acts;
994*4882a593Smuzhiyun 	}
995*4882a593Smuzhiyun 
996*4882a593Smuzhiyun 	ovs_lock();
997*4882a593Smuzhiyun 	dp = get_dp(net, ovs_header->dp_ifindex);
998*4882a593Smuzhiyun 	if (unlikely(!dp)) {
999*4882a593Smuzhiyun 		error = -ENODEV;
1000*4882a593Smuzhiyun 		goto err_unlock_ovs;
1001*4882a593Smuzhiyun 	}
1002*4882a593Smuzhiyun 
1003*4882a593Smuzhiyun 	/* Check if this is a duplicate flow */
1004*4882a593Smuzhiyun 	if (ovs_identifier_is_ufid(&new_flow->id))
1005*4882a593Smuzhiyun 		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
1006*4882a593Smuzhiyun 	if (!flow)
1007*4882a593Smuzhiyun 		flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
1008*4882a593Smuzhiyun 	if (likely(!flow)) {
1009*4882a593Smuzhiyun 		rcu_assign_pointer(new_flow->sf_acts, acts);
1010*4882a593Smuzhiyun 
1011*4882a593Smuzhiyun 		/* Put flow in bucket. */
1012*4882a593Smuzhiyun 		error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
1013*4882a593Smuzhiyun 		if (unlikely(error)) {
1014*4882a593Smuzhiyun 			acts = NULL;
1015*4882a593Smuzhiyun 			goto err_unlock_ovs;
1016*4882a593Smuzhiyun 		}
1017*4882a593Smuzhiyun 
1018*4882a593Smuzhiyun 		if (unlikely(reply)) {
1019*4882a593Smuzhiyun 			error = ovs_flow_cmd_fill_info(new_flow,
1020*4882a593Smuzhiyun 						       ovs_header->dp_ifindex,
1021*4882a593Smuzhiyun 						       reply, info->snd_portid,
1022*4882a593Smuzhiyun 						       info->snd_seq, 0,
1023*4882a593Smuzhiyun 						       OVS_FLOW_CMD_NEW,
1024*4882a593Smuzhiyun 						       ufid_flags);
1025*4882a593Smuzhiyun 			BUG_ON(error < 0);
1026*4882a593Smuzhiyun 		}
1027*4882a593Smuzhiyun 		ovs_unlock();
1028*4882a593Smuzhiyun 	} else {
1029*4882a593Smuzhiyun 		struct sw_flow_actions *old_acts;
1030*4882a593Smuzhiyun 
1031*4882a593Smuzhiyun 		/* Bail out if we're not allowed to modify an existing flow.
1032*4882a593Smuzhiyun 		 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1033*4882a593Smuzhiyun 		 * because Generic Netlink treats the latter as a dump
1034*4882a593Smuzhiyun 		 * request.  We also accept NLM_F_EXCL in case that bug ever
1035*4882a593Smuzhiyun 		 * gets fixed.
1036*4882a593Smuzhiyun 		 */
1037*4882a593Smuzhiyun 		if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
1038*4882a593Smuzhiyun 							 | NLM_F_EXCL))) {
1039*4882a593Smuzhiyun 			error = -EEXIST;
1040*4882a593Smuzhiyun 			goto err_unlock_ovs;
1041*4882a593Smuzhiyun 		}
1042*4882a593Smuzhiyun 		/* The flow identifier has to be the same for flow updates.
1043*4882a593Smuzhiyun 		 * Look for any overlapping flow.
1044*4882a593Smuzhiyun 		 */
1045*4882a593Smuzhiyun 		if (unlikely(!ovs_flow_cmp(flow, &match))) {
1046*4882a593Smuzhiyun 			if (ovs_identifier_is_key(&flow->id))
1047*4882a593Smuzhiyun 				flow = ovs_flow_tbl_lookup_exact(&dp->table,
1048*4882a593Smuzhiyun 								 &match);
1049*4882a593Smuzhiyun 			else /* UFID matches but key is different */
1050*4882a593Smuzhiyun 				flow = NULL;
1051*4882a593Smuzhiyun 			if (!flow) {
1052*4882a593Smuzhiyun 				error = -ENOENT;
1053*4882a593Smuzhiyun 				goto err_unlock_ovs;
1054*4882a593Smuzhiyun 			}
1055*4882a593Smuzhiyun 		}
1056*4882a593Smuzhiyun 		/* Update actions. */
1057*4882a593Smuzhiyun 		old_acts = ovsl_dereference(flow->sf_acts);
1058*4882a593Smuzhiyun 		rcu_assign_pointer(flow->sf_acts, acts);
1059*4882a593Smuzhiyun 
1060*4882a593Smuzhiyun 		if (unlikely(reply)) {
1061*4882a593Smuzhiyun 			error = ovs_flow_cmd_fill_info(flow,
1062*4882a593Smuzhiyun 						       ovs_header->dp_ifindex,
1063*4882a593Smuzhiyun 						       reply, info->snd_portid,
1064*4882a593Smuzhiyun 						       info->snd_seq, 0,
1065*4882a593Smuzhiyun 						       OVS_FLOW_CMD_NEW,
1066*4882a593Smuzhiyun 						       ufid_flags);
1067*4882a593Smuzhiyun 			BUG_ON(error < 0);
1068*4882a593Smuzhiyun 		}
1069*4882a593Smuzhiyun 		ovs_unlock();
1070*4882a593Smuzhiyun 
1071*4882a593Smuzhiyun 		ovs_nla_free_flow_actions_rcu(old_acts);
1072*4882a593Smuzhiyun 		ovs_flow_free(new_flow, false);
1073*4882a593Smuzhiyun 	}
1074*4882a593Smuzhiyun 
1075*4882a593Smuzhiyun 	if (reply)
1076*4882a593Smuzhiyun 		ovs_notify(&dp_flow_genl_family, reply, info);
1077*4882a593Smuzhiyun 	return 0;
1078*4882a593Smuzhiyun 
1079*4882a593Smuzhiyun err_unlock_ovs:
1080*4882a593Smuzhiyun 	ovs_unlock();
1081*4882a593Smuzhiyun 	kfree_skb(reply);
1082*4882a593Smuzhiyun err_kfree_acts:
1083*4882a593Smuzhiyun 	ovs_nla_free_flow_actions(acts);
1084*4882a593Smuzhiyun err_kfree_flow:
1085*4882a593Smuzhiyun 	ovs_flow_free(new_flow, false);
1086*4882a593Smuzhiyun error:
1087*4882a593Smuzhiyun 	return error;
1088*4882a593Smuzhiyun }
1089*4882a593Smuzhiyun 
1090*4882a593Smuzhiyun /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1091*4882a593Smuzhiyun static noinline_for_stack
get_flow_actions(struct net * net,const struct nlattr * a,const struct sw_flow_key * key,const struct sw_flow_mask * mask,bool log)1092*4882a593Smuzhiyun struct sw_flow_actions *get_flow_actions(struct net *net,
1093*4882a593Smuzhiyun 					 const struct nlattr *a,
1094*4882a593Smuzhiyun 					 const struct sw_flow_key *key,
1095*4882a593Smuzhiyun 					 const struct sw_flow_mask *mask,
1096*4882a593Smuzhiyun 					 bool log)
1097*4882a593Smuzhiyun {
1098*4882a593Smuzhiyun 	struct sw_flow_actions *acts;
1099*4882a593Smuzhiyun 	struct sw_flow_key masked_key;
1100*4882a593Smuzhiyun 	int error;
1101*4882a593Smuzhiyun 
1102*4882a593Smuzhiyun 	ovs_flow_mask_key(&masked_key, key, true, mask);
1103*4882a593Smuzhiyun 	error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
1104*4882a593Smuzhiyun 	if (error) {
1105*4882a593Smuzhiyun 		OVS_NLERR(log,
1106*4882a593Smuzhiyun 			  "Actions may not be safe on all matching packets");
1107*4882a593Smuzhiyun 		return ERR_PTR(error);
1108*4882a593Smuzhiyun 	}
1109*4882a593Smuzhiyun 
1110*4882a593Smuzhiyun 	return acts;
1111*4882a593Smuzhiyun }
1112*4882a593Smuzhiyun 
1113*4882a593Smuzhiyun /* Factor out match-init and action-copy to avoid
1114*4882a593Smuzhiyun  * "Wframe-larger-than=1024" warning. Because mask is only
1115*4882a593Smuzhiyun  * used to get actions, we new a function to save some
1116*4882a593Smuzhiyun  * stack space.
1117*4882a593Smuzhiyun  *
1118*4882a593Smuzhiyun  * If there are not key and action attrs, we return 0
1119*4882a593Smuzhiyun  * directly. In the case, the caller will also not use the
1120*4882a593Smuzhiyun  * match as before. If there is action attr, we try to get
1121*4882a593Smuzhiyun  * actions and save them to *acts. Before returning from
1122*4882a593Smuzhiyun  * the function, we reset the match->mask pointer. Because
1123*4882a593Smuzhiyun  * we should not to return match object with dangling reference
1124*4882a593Smuzhiyun  * to mask.
1125*4882a593Smuzhiyun  * */
1126*4882a593Smuzhiyun static noinline_for_stack int
ovs_nla_init_match_and_action(struct net * net,struct sw_flow_match * match,struct sw_flow_key * key,struct nlattr ** a,struct sw_flow_actions ** acts,bool log)1127*4882a593Smuzhiyun ovs_nla_init_match_and_action(struct net *net,
1128*4882a593Smuzhiyun 			      struct sw_flow_match *match,
1129*4882a593Smuzhiyun 			      struct sw_flow_key *key,
1130*4882a593Smuzhiyun 			      struct nlattr **a,
1131*4882a593Smuzhiyun 			      struct sw_flow_actions **acts,
1132*4882a593Smuzhiyun 			      bool log)
1133*4882a593Smuzhiyun {
1134*4882a593Smuzhiyun 	struct sw_flow_mask mask;
1135*4882a593Smuzhiyun 	int error = 0;
1136*4882a593Smuzhiyun 
1137*4882a593Smuzhiyun 	if (a[OVS_FLOW_ATTR_KEY]) {
1138*4882a593Smuzhiyun 		ovs_match_init(match, key, true, &mask);
1139*4882a593Smuzhiyun 		error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
1140*4882a593Smuzhiyun 					  a[OVS_FLOW_ATTR_MASK], log);
1141*4882a593Smuzhiyun 		if (error)
1142*4882a593Smuzhiyun 			goto error;
1143*4882a593Smuzhiyun 	}
1144*4882a593Smuzhiyun 
1145*4882a593Smuzhiyun 	if (a[OVS_FLOW_ATTR_ACTIONS]) {
1146*4882a593Smuzhiyun 		if (!a[OVS_FLOW_ATTR_KEY]) {
1147*4882a593Smuzhiyun 			OVS_NLERR(log,
1148*4882a593Smuzhiyun 				  "Flow key attribute not present in set flow.");
1149*4882a593Smuzhiyun 			error = -EINVAL;
1150*4882a593Smuzhiyun 			goto error;
1151*4882a593Smuzhiyun 		}
1152*4882a593Smuzhiyun 
1153*4882a593Smuzhiyun 		*acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
1154*4882a593Smuzhiyun 					 &mask, log);
1155*4882a593Smuzhiyun 		if (IS_ERR(*acts)) {
1156*4882a593Smuzhiyun 			error = PTR_ERR(*acts);
1157*4882a593Smuzhiyun 			goto error;
1158*4882a593Smuzhiyun 		}
1159*4882a593Smuzhiyun 	}
1160*4882a593Smuzhiyun 
1161*4882a593Smuzhiyun 	/* On success, error is 0. */
1162*4882a593Smuzhiyun error:
1163*4882a593Smuzhiyun 	match->mask = NULL;
1164*4882a593Smuzhiyun 	return error;
1165*4882a593Smuzhiyun }
1166*4882a593Smuzhiyun 
ovs_flow_cmd_set(struct sk_buff * skb,struct genl_info * info)1167*4882a593Smuzhiyun static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1168*4882a593Smuzhiyun {
1169*4882a593Smuzhiyun 	struct net *net = sock_net(skb->sk);
1170*4882a593Smuzhiyun 	struct nlattr **a = info->attrs;
1171*4882a593Smuzhiyun 	struct ovs_header *ovs_header = info->userhdr;
1172*4882a593Smuzhiyun 	struct sw_flow_key key;
1173*4882a593Smuzhiyun 	struct sw_flow *flow;
1174*4882a593Smuzhiyun 	struct sk_buff *reply = NULL;
1175*4882a593Smuzhiyun 	struct datapath *dp;
1176*4882a593Smuzhiyun 	struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1177*4882a593Smuzhiyun 	struct sw_flow_match match;
1178*4882a593Smuzhiyun 	struct sw_flow_id sfid;
1179*4882a593Smuzhiyun 	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1180*4882a593Smuzhiyun 	int error = 0;
1181*4882a593Smuzhiyun 	bool log = !a[OVS_FLOW_ATTR_PROBE];
1182*4882a593Smuzhiyun 	bool ufid_present;
1183*4882a593Smuzhiyun 
1184*4882a593Smuzhiyun 	ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1185*4882a593Smuzhiyun 	if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
1186*4882a593Smuzhiyun 		OVS_NLERR(log,
1187*4882a593Smuzhiyun 			  "Flow set message rejected, Key attribute missing.");
1188*4882a593Smuzhiyun 		return -EINVAL;
1189*4882a593Smuzhiyun 	}
1190*4882a593Smuzhiyun 
1191*4882a593Smuzhiyun 	error = ovs_nla_init_match_and_action(net, &match, &key, a,
1192*4882a593Smuzhiyun 					      &acts, log);
1193*4882a593Smuzhiyun 	if (error)
1194*4882a593Smuzhiyun 		goto error;
1195*4882a593Smuzhiyun 
1196*4882a593Smuzhiyun 	if (acts) {
1197*4882a593Smuzhiyun 		/* Can allocate before locking if have acts. */
1198*4882a593Smuzhiyun 		reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1199*4882a593Smuzhiyun 						ufid_flags);
1200*4882a593Smuzhiyun 		if (IS_ERR(reply)) {
1201*4882a593Smuzhiyun 			error = PTR_ERR(reply);
1202*4882a593Smuzhiyun 			goto err_kfree_acts;
1203*4882a593Smuzhiyun 		}
1204*4882a593Smuzhiyun 	}
1205*4882a593Smuzhiyun 
1206*4882a593Smuzhiyun 	ovs_lock();
1207*4882a593Smuzhiyun 	dp = get_dp(net, ovs_header->dp_ifindex);
1208*4882a593Smuzhiyun 	if (unlikely(!dp)) {
1209*4882a593Smuzhiyun 		error = -ENODEV;
1210*4882a593Smuzhiyun 		goto err_unlock_ovs;
1211*4882a593Smuzhiyun 	}
1212*4882a593Smuzhiyun 	/* Check that the flow exists. */
1213*4882a593Smuzhiyun 	if (ufid_present)
1214*4882a593Smuzhiyun 		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1215*4882a593Smuzhiyun 	else
1216*4882a593Smuzhiyun 		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1217*4882a593Smuzhiyun 	if (unlikely(!flow)) {
1218*4882a593Smuzhiyun 		error = -ENOENT;
1219*4882a593Smuzhiyun 		goto err_unlock_ovs;
1220*4882a593Smuzhiyun 	}
1221*4882a593Smuzhiyun 
1222*4882a593Smuzhiyun 	/* Update actions, if present. */
1223*4882a593Smuzhiyun 	if (likely(acts)) {
1224*4882a593Smuzhiyun 		old_acts = ovsl_dereference(flow->sf_acts);
1225*4882a593Smuzhiyun 		rcu_assign_pointer(flow->sf_acts, acts);
1226*4882a593Smuzhiyun 
1227*4882a593Smuzhiyun 		if (unlikely(reply)) {
1228*4882a593Smuzhiyun 			error = ovs_flow_cmd_fill_info(flow,
1229*4882a593Smuzhiyun 						       ovs_header->dp_ifindex,
1230*4882a593Smuzhiyun 						       reply, info->snd_portid,
1231*4882a593Smuzhiyun 						       info->snd_seq, 0,
1232*4882a593Smuzhiyun 						       OVS_FLOW_CMD_SET,
1233*4882a593Smuzhiyun 						       ufid_flags);
1234*4882a593Smuzhiyun 			BUG_ON(error < 0);
1235*4882a593Smuzhiyun 		}
1236*4882a593Smuzhiyun 	} else {
1237*4882a593Smuzhiyun 		/* Could not alloc without acts before locking. */
1238*4882a593Smuzhiyun 		reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1239*4882a593Smuzhiyun 						info, OVS_FLOW_CMD_SET, false,
1240*4882a593Smuzhiyun 						ufid_flags);
1241*4882a593Smuzhiyun 
1242*4882a593Smuzhiyun 		if (IS_ERR(reply)) {
1243*4882a593Smuzhiyun 			error = PTR_ERR(reply);
1244*4882a593Smuzhiyun 			goto err_unlock_ovs;
1245*4882a593Smuzhiyun 		}
1246*4882a593Smuzhiyun 	}
1247*4882a593Smuzhiyun 
1248*4882a593Smuzhiyun 	/* Clear stats. */
1249*4882a593Smuzhiyun 	if (a[OVS_FLOW_ATTR_CLEAR])
1250*4882a593Smuzhiyun 		ovs_flow_stats_clear(flow);
1251*4882a593Smuzhiyun 	ovs_unlock();
1252*4882a593Smuzhiyun 
1253*4882a593Smuzhiyun 	if (reply)
1254*4882a593Smuzhiyun 		ovs_notify(&dp_flow_genl_family, reply, info);
1255*4882a593Smuzhiyun 	if (old_acts)
1256*4882a593Smuzhiyun 		ovs_nla_free_flow_actions_rcu(old_acts);
1257*4882a593Smuzhiyun 
1258*4882a593Smuzhiyun 	return 0;
1259*4882a593Smuzhiyun 
1260*4882a593Smuzhiyun err_unlock_ovs:
1261*4882a593Smuzhiyun 	ovs_unlock();
1262*4882a593Smuzhiyun 	kfree_skb(reply);
1263*4882a593Smuzhiyun err_kfree_acts:
1264*4882a593Smuzhiyun 	ovs_nla_free_flow_actions(acts);
1265*4882a593Smuzhiyun error:
1266*4882a593Smuzhiyun 	return error;
1267*4882a593Smuzhiyun }
1268*4882a593Smuzhiyun 
ovs_flow_cmd_get(struct sk_buff * skb,struct genl_info * info)1269*4882a593Smuzhiyun static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1270*4882a593Smuzhiyun {
1271*4882a593Smuzhiyun 	struct nlattr **a = info->attrs;
1272*4882a593Smuzhiyun 	struct ovs_header *ovs_header = info->userhdr;
1273*4882a593Smuzhiyun 	struct net *net = sock_net(skb->sk);
1274*4882a593Smuzhiyun 	struct sw_flow_key key;
1275*4882a593Smuzhiyun 	struct sk_buff *reply;
1276*4882a593Smuzhiyun 	struct sw_flow *flow;
1277*4882a593Smuzhiyun 	struct datapath *dp;
1278*4882a593Smuzhiyun 	struct sw_flow_match match;
1279*4882a593Smuzhiyun 	struct sw_flow_id ufid;
1280*4882a593Smuzhiyun 	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1281*4882a593Smuzhiyun 	int err = 0;
1282*4882a593Smuzhiyun 	bool log = !a[OVS_FLOW_ATTR_PROBE];
1283*4882a593Smuzhiyun 	bool ufid_present;
1284*4882a593Smuzhiyun 
1285*4882a593Smuzhiyun 	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1286*4882a593Smuzhiyun 	if (a[OVS_FLOW_ATTR_KEY]) {
1287*4882a593Smuzhiyun 		ovs_match_init(&match, &key, true, NULL);
1288*4882a593Smuzhiyun 		err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
1289*4882a593Smuzhiyun 					log);
1290*4882a593Smuzhiyun 	} else if (!ufid_present) {
1291*4882a593Smuzhiyun 		OVS_NLERR(log,
1292*4882a593Smuzhiyun 			  "Flow get message rejected, Key attribute missing.");
1293*4882a593Smuzhiyun 		err = -EINVAL;
1294*4882a593Smuzhiyun 	}
1295*4882a593Smuzhiyun 	if (err)
1296*4882a593Smuzhiyun 		return err;
1297*4882a593Smuzhiyun 
1298*4882a593Smuzhiyun 	ovs_lock();
1299*4882a593Smuzhiyun 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1300*4882a593Smuzhiyun 	if (!dp) {
1301*4882a593Smuzhiyun 		err = -ENODEV;
1302*4882a593Smuzhiyun 		goto unlock;
1303*4882a593Smuzhiyun 	}
1304*4882a593Smuzhiyun 
1305*4882a593Smuzhiyun 	if (ufid_present)
1306*4882a593Smuzhiyun 		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1307*4882a593Smuzhiyun 	else
1308*4882a593Smuzhiyun 		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1309*4882a593Smuzhiyun 	if (!flow) {
1310*4882a593Smuzhiyun 		err = -ENOENT;
1311*4882a593Smuzhiyun 		goto unlock;
1312*4882a593Smuzhiyun 	}
1313*4882a593Smuzhiyun 
1314*4882a593Smuzhiyun 	reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1315*4882a593Smuzhiyun 					OVS_FLOW_CMD_GET, true, ufid_flags);
1316*4882a593Smuzhiyun 	if (IS_ERR(reply)) {
1317*4882a593Smuzhiyun 		err = PTR_ERR(reply);
1318*4882a593Smuzhiyun 		goto unlock;
1319*4882a593Smuzhiyun 	}
1320*4882a593Smuzhiyun 
1321*4882a593Smuzhiyun 	ovs_unlock();
1322*4882a593Smuzhiyun 	return genlmsg_reply(reply, info);
1323*4882a593Smuzhiyun unlock:
1324*4882a593Smuzhiyun 	ovs_unlock();
1325*4882a593Smuzhiyun 	return err;
1326*4882a593Smuzhiyun }
1327*4882a593Smuzhiyun 
ovs_flow_cmd_del(struct sk_buff * skb,struct genl_info * info)1328*4882a593Smuzhiyun static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1329*4882a593Smuzhiyun {
1330*4882a593Smuzhiyun 	struct nlattr **a = info->attrs;
1331*4882a593Smuzhiyun 	struct ovs_header *ovs_header = info->userhdr;
1332*4882a593Smuzhiyun 	struct net *net = sock_net(skb->sk);
1333*4882a593Smuzhiyun 	struct sw_flow_key key;
1334*4882a593Smuzhiyun 	struct sk_buff *reply;
1335*4882a593Smuzhiyun 	struct sw_flow *flow = NULL;
1336*4882a593Smuzhiyun 	struct datapath *dp;
1337*4882a593Smuzhiyun 	struct sw_flow_match match;
1338*4882a593Smuzhiyun 	struct sw_flow_id ufid;
1339*4882a593Smuzhiyun 	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1340*4882a593Smuzhiyun 	int err;
1341*4882a593Smuzhiyun 	bool log = !a[OVS_FLOW_ATTR_PROBE];
1342*4882a593Smuzhiyun 	bool ufid_present;
1343*4882a593Smuzhiyun 
1344*4882a593Smuzhiyun 	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1345*4882a593Smuzhiyun 	if (a[OVS_FLOW_ATTR_KEY]) {
1346*4882a593Smuzhiyun 		ovs_match_init(&match, &key, true, NULL);
1347*4882a593Smuzhiyun 		err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1348*4882a593Smuzhiyun 					NULL, log);
1349*4882a593Smuzhiyun 		if (unlikely(err))
1350*4882a593Smuzhiyun 			return err;
1351*4882a593Smuzhiyun 	}
1352*4882a593Smuzhiyun 
1353*4882a593Smuzhiyun 	ovs_lock();
1354*4882a593Smuzhiyun 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1355*4882a593Smuzhiyun 	if (unlikely(!dp)) {
1356*4882a593Smuzhiyun 		err = -ENODEV;
1357*4882a593Smuzhiyun 		goto unlock;
1358*4882a593Smuzhiyun 	}
1359*4882a593Smuzhiyun 
1360*4882a593Smuzhiyun 	if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1361*4882a593Smuzhiyun 		err = ovs_flow_tbl_flush(&dp->table);
1362*4882a593Smuzhiyun 		goto unlock;
1363*4882a593Smuzhiyun 	}
1364*4882a593Smuzhiyun 
1365*4882a593Smuzhiyun 	if (ufid_present)
1366*4882a593Smuzhiyun 		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1367*4882a593Smuzhiyun 	else
1368*4882a593Smuzhiyun 		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1369*4882a593Smuzhiyun 	if (unlikely(!flow)) {
1370*4882a593Smuzhiyun 		err = -ENOENT;
1371*4882a593Smuzhiyun 		goto unlock;
1372*4882a593Smuzhiyun 	}
1373*4882a593Smuzhiyun 
1374*4882a593Smuzhiyun 	ovs_flow_tbl_remove(&dp->table, flow);
1375*4882a593Smuzhiyun 	ovs_unlock();
1376*4882a593Smuzhiyun 
1377*4882a593Smuzhiyun 	reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
1378*4882a593Smuzhiyun 					&flow->id, info, false, ufid_flags);
1379*4882a593Smuzhiyun 	if (likely(reply)) {
1380*4882a593Smuzhiyun 		if (!IS_ERR(reply)) {
1381*4882a593Smuzhiyun 			rcu_read_lock();	/*To keep RCU checker happy. */
1382*4882a593Smuzhiyun 			err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1383*4882a593Smuzhiyun 						     reply, info->snd_portid,
1384*4882a593Smuzhiyun 						     info->snd_seq, 0,
1385*4882a593Smuzhiyun 						     OVS_FLOW_CMD_DEL,
1386*4882a593Smuzhiyun 						     ufid_flags);
1387*4882a593Smuzhiyun 			rcu_read_unlock();
1388*4882a593Smuzhiyun 			if (WARN_ON_ONCE(err < 0)) {
1389*4882a593Smuzhiyun 				kfree_skb(reply);
1390*4882a593Smuzhiyun 				goto out_free;
1391*4882a593Smuzhiyun 			}
1392*4882a593Smuzhiyun 
1393*4882a593Smuzhiyun 			ovs_notify(&dp_flow_genl_family, reply, info);
1394*4882a593Smuzhiyun 		} else {
1395*4882a593Smuzhiyun 			netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0,
1396*4882a593Smuzhiyun 					PTR_ERR(reply));
1397*4882a593Smuzhiyun 		}
1398*4882a593Smuzhiyun 	}
1399*4882a593Smuzhiyun 
1400*4882a593Smuzhiyun out_free:
1401*4882a593Smuzhiyun 	ovs_flow_free(flow, true);
1402*4882a593Smuzhiyun 	return 0;
1403*4882a593Smuzhiyun unlock:
1404*4882a593Smuzhiyun 	ovs_unlock();
1405*4882a593Smuzhiyun 	return err;
1406*4882a593Smuzhiyun }
1407*4882a593Smuzhiyun 
ovs_flow_cmd_dump(struct sk_buff * skb,struct netlink_callback * cb)1408*4882a593Smuzhiyun static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1409*4882a593Smuzhiyun {
1410*4882a593Smuzhiyun 	struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1411*4882a593Smuzhiyun 	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1412*4882a593Smuzhiyun 	struct table_instance *ti;
1413*4882a593Smuzhiyun 	struct datapath *dp;
1414*4882a593Smuzhiyun 	u32 ufid_flags;
1415*4882a593Smuzhiyun 	int err;
1416*4882a593Smuzhiyun 
1417*4882a593Smuzhiyun 	err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
1418*4882a593Smuzhiyun 				       OVS_FLOW_ATTR_MAX, flow_policy, NULL);
1419*4882a593Smuzhiyun 	if (err)
1420*4882a593Smuzhiyun 		return err;
1421*4882a593Smuzhiyun 	ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1422*4882a593Smuzhiyun 
1423*4882a593Smuzhiyun 	rcu_read_lock();
1424*4882a593Smuzhiyun 	dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1425*4882a593Smuzhiyun 	if (!dp) {
1426*4882a593Smuzhiyun 		rcu_read_unlock();
1427*4882a593Smuzhiyun 		return -ENODEV;
1428*4882a593Smuzhiyun 	}
1429*4882a593Smuzhiyun 
1430*4882a593Smuzhiyun 	ti = rcu_dereference(dp->table.ti);
1431*4882a593Smuzhiyun 	for (;;) {
1432*4882a593Smuzhiyun 		struct sw_flow *flow;
1433*4882a593Smuzhiyun 		u32 bucket, obj;
1434*4882a593Smuzhiyun 
1435*4882a593Smuzhiyun 		bucket = cb->args[0];
1436*4882a593Smuzhiyun 		obj = cb->args[1];
1437*4882a593Smuzhiyun 		flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1438*4882a593Smuzhiyun 		if (!flow)
1439*4882a593Smuzhiyun 			break;
1440*4882a593Smuzhiyun 
1441*4882a593Smuzhiyun 		if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1442*4882a593Smuzhiyun 					   NETLINK_CB(cb->skb).portid,
1443*4882a593Smuzhiyun 					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1444*4882a593Smuzhiyun 					   OVS_FLOW_CMD_GET, ufid_flags) < 0)
1445*4882a593Smuzhiyun 			break;
1446*4882a593Smuzhiyun 
1447*4882a593Smuzhiyun 		cb->args[0] = bucket;
1448*4882a593Smuzhiyun 		cb->args[1] = obj;
1449*4882a593Smuzhiyun 	}
1450*4882a593Smuzhiyun 	rcu_read_unlock();
1451*4882a593Smuzhiyun 	return skb->len;
1452*4882a593Smuzhiyun }
1453*4882a593Smuzhiyun 
1454*4882a593Smuzhiyun static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1455*4882a593Smuzhiyun 	[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1456*4882a593Smuzhiyun 	[OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1457*4882a593Smuzhiyun 	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1458*4882a593Smuzhiyun 	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1459*4882a593Smuzhiyun 	[OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1460*4882a593Smuzhiyun 	[OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1461*4882a593Smuzhiyun 	[OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1462*4882a593Smuzhiyun };
1463*4882a593Smuzhiyun 
1464*4882a593Smuzhiyun static const struct genl_small_ops dp_flow_genl_ops[] = {
1465*4882a593Smuzhiyun 	{ .cmd = OVS_FLOW_CMD_NEW,
1466*4882a593Smuzhiyun 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1467*4882a593Smuzhiyun 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1468*4882a593Smuzhiyun 	  .doit = ovs_flow_cmd_new
1469*4882a593Smuzhiyun 	},
1470*4882a593Smuzhiyun 	{ .cmd = OVS_FLOW_CMD_DEL,
1471*4882a593Smuzhiyun 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1472*4882a593Smuzhiyun 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1473*4882a593Smuzhiyun 	  .doit = ovs_flow_cmd_del
1474*4882a593Smuzhiyun 	},
1475*4882a593Smuzhiyun 	{ .cmd = OVS_FLOW_CMD_GET,
1476*4882a593Smuzhiyun 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1477*4882a593Smuzhiyun 	  .flags = 0,		    /* OK for unprivileged users. */
1478*4882a593Smuzhiyun 	  .doit = ovs_flow_cmd_get,
1479*4882a593Smuzhiyun 	  .dumpit = ovs_flow_cmd_dump
1480*4882a593Smuzhiyun 	},
1481*4882a593Smuzhiyun 	{ .cmd = OVS_FLOW_CMD_SET,
1482*4882a593Smuzhiyun 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1483*4882a593Smuzhiyun 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1484*4882a593Smuzhiyun 	  .doit = ovs_flow_cmd_set,
1485*4882a593Smuzhiyun 	},
1486*4882a593Smuzhiyun };
1487*4882a593Smuzhiyun 
1488*4882a593Smuzhiyun static struct genl_family dp_flow_genl_family __ro_after_init = {
1489*4882a593Smuzhiyun 	.hdrsize = sizeof(struct ovs_header),
1490*4882a593Smuzhiyun 	.name = OVS_FLOW_FAMILY,
1491*4882a593Smuzhiyun 	.version = OVS_FLOW_VERSION,
1492*4882a593Smuzhiyun 	.maxattr = OVS_FLOW_ATTR_MAX,
1493*4882a593Smuzhiyun 	.policy = flow_policy,
1494*4882a593Smuzhiyun 	.netnsok = true,
1495*4882a593Smuzhiyun 	.parallel_ops = true,
1496*4882a593Smuzhiyun 	.small_ops = dp_flow_genl_ops,
1497*4882a593Smuzhiyun 	.n_small_ops = ARRAY_SIZE(dp_flow_genl_ops),
1498*4882a593Smuzhiyun 	.mcgrps = &ovs_dp_flow_multicast_group,
1499*4882a593Smuzhiyun 	.n_mcgrps = 1,
1500*4882a593Smuzhiyun 	.module = THIS_MODULE,
1501*4882a593Smuzhiyun };
1502*4882a593Smuzhiyun 
ovs_dp_cmd_msg_size(void)1503*4882a593Smuzhiyun static size_t ovs_dp_cmd_msg_size(void)
1504*4882a593Smuzhiyun {
1505*4882a593Smuzhiyun 	size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1506*4882a593Smuzhiyun 
1507*4882a593Smuzhiyun 	msgsize += nla_total_size(IFNAMSIZ);
1508*4882a593Smuzhiyun 	msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
1509*4882a593Smuzhiyun 	msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
1510*4882a593Smuzhiyun 	msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1511*4882a593Smuzhiyun 	msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
1512*4882a593Smuzhiyun 
1513*4882a593Smuzhiyun 	return msgsize;
1514*4882a593Smuzhiyun }
1515*4882a593Smuzhiyun 
1516*4882a593Smuzhiyun /* Called with ovs_mutex. */
ovs_dp_cmd_fill_info(struct datapath * dp,struct sk_buff * skb,u32 portid,u32 seq,u32 flags,u8 cmd)1517*4882a593Smuzhiyun static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1518*4882a593Smuzhiyun 				u32 portid, u32 seq, u32 flags, u8 cmd)
1519*4882a593Smuzhiyun {
1520*4882a593Smuzhiyun 	struct ovs_header *ovs_header;
1521*4882a593Smuzhiyun 	struct ovs_dp_stats dp_stats;
1522*4882a593Smuzhiyun 	struct ovs_dp_megaflow_stats dp_megaflow_stats;
1523*4882a593Smuzhiyun 	int err;
1524*4882a593Smuzhiyun 
1525*4882a593Smuzhiyun 	ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1526*4882a593Smuzhiyun 				 flags, cmd);
1527*4882a593Smuzhiyun 	if (!ovs_header)
1528*4882a593Smuzhiyun 		goto error;
1529*4882a593Smuzhiyun 
1530*4882a593Smuzhiyun 	ovs_header->dp_ifindex = get_dpifindex(dp);
1531*4882a593Smuzhiyun 
1532*4882a593Smuzhiyun 	err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1533*4882a593Smuzhiyun 	if (err)
1534*4882a593Smuzhiyun 		goto nla_put_failure;
1535*4882a593Smuzhiyun 
1536*4882a593Smuzhiyun 	get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1537*4882a593Smuzhiyun 	if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1538*4882a593Smuzhiyun 			  &dp_stats, OVS_DP_ATTR_PAD))
1539*4882a593Smuzhiyun 		goto nla_put_failure;
1540*4882a593Smuzhiyun 
1541*4882a593Smuzhiyun 	if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1542*4882a593Smuzhiyun 			  sizeof(struct ovs_dp_megaflow_stats),
1543*4882a593Smuzhiyun 			  &dp_megaflow_stats, OVS_DP_ATTR_PAD))
1544*4882a593Smuzhiyun 		goto nla_put_failure;
1545*4882a593Smuzhiyun 
1546*4882a593Smuzhiyun 	if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1547*4882a593Smuzhiyun 		goto nla_put_failure;
1548*4882a593Smuzhiyun 
1549*4882a593Smuzhiyun 	if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
1550*4882a593Smuzhiyun 			ovs_flow_tbl_masks_cache_size(&dp->table)))
1551*4882a593Smuzhiyun 		goto nla_put_failure;
1552*4882a593Smuzhiyun 
1553*4882a593Smuzhiyun 	genlmsg_end(skb, ovs_header);
1554*4882a593Smuzhiyun 	return 0;
1555*4882a593Smuzhiyun 
1556*4882a593Smuzhiyun nla_put_failure:
1557*4882a593Smuzhiyun 	genlmsg_cancel(skb, ovs_header);
1558*4882a593Smuzhiyun error:
1559*4882a593Smuzhiyun 	return -EMSGSIZE;
1560*4882a593Smuzhiyun }
1561*4882a593Smuzhiyun 
ovs_dp_cmd_alloc_info(void)1562*4882a593Smuzhiyun static struct sk_buff *ovs_dp_cmd_alloc_info(void)
1563*4882a593Smuzhiyun {
1564*4882a593Smuzhiyun 	return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1565*4882a593Smuzhiyun }
1566*4882a593Smuzhiyun 
1567*4882a593Smuzhiyun /* Called with rcu_read_lock or ovs_mutex. */
lookup_datapath(struct net * net,const struct ovs_header * ovs_header,struct nlattr * a[OVS_DP_ATTR_MAX+1])1568*4882a593Smuzhiyun static struct datapath *lookup_datapath(struct net *net,
1569*4882a593Smuzhiyun 					const struct ovs_header *ovs_header,
1570*4882a593Smuzhiyun 					struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1571*4882a593Smuzhiyun {
1572*4882a593Smuzhiyun 	struct datapath *dp;
1573*4882a593Smuzhiyun 
1574*4882a593Smuzhiyun 	if (!a[OVS_DP_ATTR_NAME])
1575*4882a593Smuzhiyun 		dp = get_dp(net, ovs_header->dp_ifindex);
1576*4882a593Smuzhiyun 	else {
1577*4882a593Smuzhiyun 		struct vport *vport;
1578*4882a593Smuzhiyun 
1579*4882a593Smuzhiyun 		vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1580*4882a593Smuzhiyun 		dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1581*4882a593Smuzhiyun 	}
1582*4882a593Smuzhiyun 	return dp ? dp : ERR_PTR(-ENODEV);
1583*4882a593Smuzhiyun }
1584*4882a593Smuzhiyun 
ovs_dp_reset_user_features(struct sk_buff * skb,struct genl_info * info)1585*4882a593Smuzhiyun static void ovs_dp_reset_user_features(struct sk_buff *skb,
1586*4882a593Smuzhiyun 				       struct genl_info *info)
1587*4882a593Smuzhiyun {
1588*4882a593Smuzhiyun 	struct datapath *dp;
1589*4882a593Smuzhiyun 
1590*4882a593Smuzhiyun 	dp = lookup_datapath(sock_net(skb->sk), info->userhdr,
1591*4882a593Smuzhiyun 			     info->attrs);
1592*4882a593Smuzhiyun 	if (IS_ERR(dp))
1593*4882a593Smuzhiyun 		return;
1594*4882a593Smuzhiyun 
1595*4882a593Smuzhiyun 	pr_warn("%s: Dropping previously announced user features\n",
1596*4882a593Smuzhiyun 		ovs_dp_name(dp));
1597*4882a593Smuzhiyun 	dp->user_features = 0;
1598*4882a593Smuzhiyun }
1599*4882a593Smuzhiyun 
1600*4882a593Smuzhiyun DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
1601*4882a593Smuzhiyun 
ovs_dp_change(struct datapath * dp,struct nlattr * a[])1602*4882a593Smuzhiyun static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1603*4882a593Smuzhiyun {
1604*4882a593Smuzhiyun 	u32 user_features = 0;
1605*4882a593Smuzhiyun 
1606*4882a593Smuzhiyun 	if (a[OVS_DP_ATTR_USER_FEATURES]) {
1607*4882a593Smuzhiyun 		user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1608*4882a593Smuzhiyun 
1609*4882a593Smuzhiyun 		if (user_features & ~(OVS_DP_F_VPORT_PIDS |
1610*4882a593Smuzhiyun 				      OVS_DP_F_UNALIGNED |
1611*4882a593Smuzhiyun 				      OVS_DP_F_TC_RECIRC_SHARING))
1612*4882a593Smuzhiyun 			return -EOPNOTSUPP;
1613*4882a593Smuzhiyun 
1614*4882a593Smuzhiyun #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1615*4882a593Smuzhiyun 		if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
1616*4882a593Smuzhiyun 			return -EOPNOTSUPP;
1617*4882a593Smuzhiyun #endif
1618*4882a593Smuzhiyun 	}
1619*4882a593Smuzhiyun 
1620*4882a593Smuzhiyun 	if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
1621*4882a593Smuzhiyun 		int err;
1622*4882a593Smuzhiyun 		u32 cache_size;
1623*4882a593Smuzhiyun 
1624*4882a593Smuzhiyun 		cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
1625*4882a593Smuzhiyun 		err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
1626*4882a593Smuzhiyun 		if (err)
1627*4882a593Smuzhiyun 			return err;
1628*4882a593Smuzhiyun 	}
1629*4882a593Smuzhiyun 
1630*4882a593Smuzhiyun 	dp->user_features = user_features;
1631*4882a593Smuzhiyun 
1632*4882a593Smuzhiyun 	if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
1633*4882a593Smuzhiyun 		static_branch_enable(&tc_recirc_sharing_support);
1634*4882a593Smuzhiyun 	else
1635*4882a593Smuzhiyun 		static_branch_disable(&tc_recirc_sharing_support);
1636*4882a593Smuzhiyun 
1637*4882a593Smuzhiyun 	return 0;
1638*4882a593Smuzhiyun }
1639*4882a593Smuzhiyun 
ovs_dp_stats_init(struct datapath * dp)1640*4882a593Smuzhiyun static int ovs_dp_stats_init(struct datapath *dp)
1641*4882a593Smuzhiyun {
1642*4882a593Smuzhiyun 	dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1643*4882a593Smuzhiyun 	if (!dp->stats_percpu)
1644*4882a593Smuzhiyun 		return -ENOMEM;
1645*4882a593Smuzhiyun 
1646*4882a593Smuzhiyun 	return 0;
1647*4882a593Smuzhiyun }
1648*4882a593Smuzhiyun 
ovs_dp_vport_init(struct datapath * dp)1649*4882a593Smuzhiyun static int ovs_dp_vport_init(struct datapath *dp)
1650*4882a593Smuzhiyun {
1651*4882a593Smuzhiyun 	int i;
1652*4882a593Smuzhiyun 
1653*4882a593Smuzhiyun 	dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
1654*4882a593Smuzhiyun 				  sizeof(struct hlist_head),
1655*4882a593Smuzhiyun 				  GFP_KERNEL);
1656*4882a593Smuzhiyun 	if (!dp->ports)
1657*4882a593Smuzhiyun 		return -ENOMEM;
1658*4882a593Smuzhiyun 
1659*4882a593Smuzhiyun 	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1660*4882a593Smuzhiyun 		INIT_HLIST_HEAD(&dp->ports[i]);
1661*4882a593Smuzhiyun 
1662*4882a593Smuzhiyun 	return 0;
1663*4882a593Smuzhiyun }
1664*4882a593Smuzhiyun 
ovs_dp_cmd_new(struct sk_buff * skb,struct genl_info * info)1665*4882a593Smuzhiyun static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1666*4882a593Smuzhiyun {
1667*4882a593Smuzhiyun 	struct nlattr **a = info->attrs;
1668*4882a593Smuzhiyun 	struct vport_parms parms;
1669*4882a593Smuzhiyun 	struct sk_buff *reply;
1670*4882a593Smuzhiyun 	struct datapath *dp;
1671*4882a593Smuzhiyun 	struct vport *vport;
1672*4882a593Smuzhiyun 	struct ovs_net *ovs_net;
1673*4882a593Smuzhiyun 	int err;
1674*4882a593Smuzhiyun 
1675*4882a593Smuzhiyun 	err = -EINVAL;
1676*4882a593Smuzhiyun 	if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1677*4882a593Smuzhiyun 		goto err;
1678*4882a593Smuzhiyun 
1679*4882a593Smuzhiyun 	reply = ovs_dp_cmd_alloc_info();
1680*4882a593Smuzhiyun 	if (!reply)
1681*4882a593Smuzhiyun 		return -ENOMEM;
1682*4882a593Smuzhiyun 
1683*4882a593Smuzhiyun 	err = -ENOMEM;
1684*4882a593Smuzhiyun 	dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1685*4882a593Smuzhiyun 	if (dp == NULL)
1686*4882a593Smuzhiyun 		goto err_destroy_reply;
1687*4882a593Smuzhiyun 
1688*4882a593Smuzhiyun 	ovs_dp_set_net(dp, sock_net(skb->sk));
1689*4882a593Smuzhiyun 
1690*4882a593Smuzhiyun 	/* Allocate table. */
1691*4882a593Smuzhiyun 	err = ovs_flow_tbl_init(&dp->table);
1692*4882a593Smuzhiyun 	if (err)
1693*4882a593Smuzhiyun 		goto err_destroy_dp;
1694*4882a593Smuzhiyun 
1695*4882a593Smuzhiyun 	err = ovs_dp_stats_init(dp);
1696*4882a593Smuzhiyun 	if (err)
1697*4882a593Smuzhiyun 		goto err_destroy_table;
1698*4882a593Smuzhiyun 
1699*4882a593Smuzhiyun 	err = ovs_dp_vport_init(dp);
1700*4882a593Smuzhiyun 	if (err)
1701*4882a593Smuzhiyun 		goto err_destroy_stats;
1702*4882a593Smuzhiyun 
1703*4882a593Smuzhiyun 	err = ovs_meters_init(dp);
1704*4882a593Smuzhiyun 	if (err)
1705*4882a593Smuzhiyun 		goto err_destroy_ports;
1706*4882a593Smuzhiyun 
1707*4882a593Smuzhiyun 	/* Set up our datapath device. */
1708*4882a593Smuzhiyun 	parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1709*4882a593Smuzhiyun 	parms.type = OVS_VPORT_TYPE_INTERNAL;
1710*4882a593Smuzhiyun 	parms.options = NULL;
1711*4882a593Smuzhiyun 	parms.dp = dp;
1712*4882a593Smuzhiyun 	parms.port_no = OVSP_LOCAL;
1713*4882a593Smuzhiyun 	parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1714*4882a593Smuzhiyun 
1715*4882a593Smuzhiyun 	/* So far only local changes have been made, now need the lock. */
1716*4882a593Smuzhiyun 	ovs_lock();
1717*4882a593Smuzhiyun 
1718*4882a593Smuzhiyun 	err = ovs_dp_change(dp, a);
1719*4882a593Smuzhiyun 	if (err)
1720*4882a593Smuzhiyun 		goto err_unlock_and_destroy_meters;
1721*4882a593Smuzhiyun 
1722*4882a593Smuzhiyun 	vport = new_vport(&parms);
1723*4882a593Smuzhiyun 	if (IS_ERR(vport)) {
1724*4882a593Smuzhiyun 		err = PTR_ERR(vport);
1725*4882a593Smuzhiyun 		if (err == -EBUSY)
1726*4882a593Smuzhiyun 			err = -EEXIST;
1727*4882a593Smuzhiyun 
1728*4882a593Smuzhiyun 		if (err == -EEXIST) {
1729*4882a593Smuzhiyun 			/* An outdated user space instance that does not understand
1730*4882a593Smuzhiyun 			 * the concept of user_features has attempted to create a new
1731*4882a593Smuzhiyun 			 * datapath and is likely to reuse it. Drop all user features.
1732*4882a593Smuzhiyun 			 */
1733*4882a593Smuzhiyun 			if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1734*4882a593Smuzhiyun 				ovs_dp_reset_user_features(skb, info);
1735*4882a593Smuzhiyun 		}
1736*4882a593Smuzhiyun 
1737*4882a593Smuzhiyun 		goto err_unlock_and_destroy_meters;
1738*4882a593Smuzhiyun 	}
1739*4882a593Smuzhiyun 
1740*4882a593Smuzhiyun 	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1741*4882a593Smuzhiyun 				   info->snd_seq, 0, OVS_DP_CMD_NEW);
1742*4882a593Smuzhiyun 	BUG_ON(err < 0);
1743*4882a593Smuzhiyun 
1744*4882a593Smuzhiyun 	ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1745*4882a593Smuzhiyun 	list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1746*4882a593Smuzhiyun 
1747*4882a593Smuzhiyun 	ovs_unlock();
1748*4882a593Smuzhiyun 
1749*4882a593Smuzhiyun 	ovs_notify(&dp_datapath_genl_family, reply, info);
1750*4882a593Smuzhiyun 	return 0;
1751*4882a593Smuzhiyun 
1752*4882a593Smuzhiyun err_unlock_and_destroy_meters:
1753*4882a593Smuzhiyun 	ovs_unlock();
1754*4882a593Smuzhiyun 	ovs_meters_exit(dp);
1755*4882a593Smuzhiyun err_destroy_ports:
1756*4882a593Smuzhiyun 	kfree(dp->ports);
1757*4882a593Smuzhiyun err_destroy_stats:
1758*4882a593Smuzhiyun 	free_percpu(dp->stats_percpu);
1759*4882a593Smuzhiyun err_destroy_table:
1760*4882a593Smuzhiyun 	ovs_flow_tbl_destroy(&dp->table);
1761*4882a593Smuzhiyun err_destroy_dp:
1762*4882a593Smuzhiyun 	kfree(dp);
1763*4882a593Smuzhiyun err_destroy_reply:
1764*4882a593Smuzhiyun 	kfree_skb(reply);
1765*4882a593Smuzhiyun err:
1766*4882a593Smuzhiyun 	return err;
1767*4882a593Smuzhiyun }
1768*4882a593Smuzhiyun 
1769*4882a593Smuzhiyun /* Called with ovs_mutex. */
__dp_destroy(struct datapath * dp)1770*4882a593Smuzhiyun static void __dp_destroy(struct datapath *dp)
1771*4882a593Smuzhiyun {
1772*4882a593Smuzhiyun 	struct flow_table *table = &dp->table;
1773*4882a593Smuzhiyun 	int i;
1774*4882a593Smuzhiyun 
1775*4882a593Smuzhiyun 	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1776*4882a593Smuzhiyun 		struct vport *vport;
1777*4882a593Smuzhiyun 		struct hlist_node *n;
1778*4882a593Smuzhiyun 
1779*4882a593Smuzhiyun 		hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1780*4882a593Smuzhiyun 			if (vport->port_no != OVSP_LOCAL)
1781*4882a593Smuzhiyun 				ovs_dp_detach_port(vport);
1782*4882a593Smuzhiyun 	}
1783*4882a593Smuzhiyun 
1784*4882a593Smuzhiyun 	list_del_rcu(&dp->list_node);
1785*4882a593Smuzhiyun 
1786*4882a593Smuzhiyun 	/* OVSP_LOCAL is datapath internal port. We need to make sure that
1787*4882a593Smuzhiyun 	 * all ports in datapath are destroyed first before freeing datapath.
1788*4882a593Smuzhiyun 	 */
1789*4882a593Smuzhiyun 	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1790*4882a593Smuzhiyun 
1791*4882a593Smuzhiyun 	/* Flush sw_flow in the tables. RCU cb only releases resource
1792*4882a593Smuzhiyun 	 * such as dp, ports and tables. That may avoid some issues
1793*4882a593Smuzhiyun 	 * such as RCU usage warning.
1794*4882a593Smuzhiyun 	 */
1795*4882a593Smuzhiyun 	table_instance_flow_flush(table, ovsl_dereference(table->ti),
1796*4882a593Smuzhiyun 				  ovsl_dereference(table->ufid_ti));
1797*4882a593Smuzhiyun 
1798*4882a593Smuzhiyun 	/* RCU destroy the ports, meters and flow tables. */
1799*4882a593Smuzhiyun 	call_rcu(&dp->rcu, destroy_dp_rcu);
1800*4882a593Smuzhiyun }
1801*4882a593Smuzhiyun 
ovs_dp_cmd_del(struct sk_buff * skb,struct genl_info * info)1802*4882a593Smuzhiyun static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1803*4882a593Smuzhiyun {
1804*4882a593Smuzhiyun 	struct sk_buff *reply;
1805*4882a593Smuzhiyun 	struct datapath *dp;
1806*4882a593Smuzhiyun 	int err;
1807*4882a593Smuzhiyun 
1808*4882a593Smuzhiyun 	reply = ovs_dp_cmd_alloc_info();
1809*4882a593Smuzhiyun 	if (!reply)
1810*4882a593Smuzhiyun 		return -ENOMEM;
1811*4882a593Smuzhiyun 
1812*4882a593Smuzhiyun 	ovs_lock();
1813*4882a593Smuzhiyun 	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1814*4882a593Smuzhiyun 	err = PTR_ERR(dp);
1815*4882a593Smuzhiyun 	if (IS_ERR(dp))
1816*4882a593Smuzhiyun 		goto err_unlock_free;
1817*4882a593Smuzhiyun 
1818*4882a593Smuzhiyun 	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1819*4882a593Smuzhiyun 				   info->snd_seq, 0, OVS_DP_CMD_DEL);
1820*4882a593Smuzhiyun 	BUG_ON(err < 0);
1821*4882a593Smuzhiyun 
1822*4882a593Smuzhiyun 	__dp_destroy(dp);
1823*4882a593Smuzhiyun 	ovs_unlock();
1824*4882a593Smuzhiyun 
1825*4882a593Smuzhiyun 	ovs_notify(&dp_datapath_genl_family, reply, info);
1826*4882a593Smuzhiyun 
1827*4882a593Smuzhiyun 	return 0;
1828*4882a593Smuzhiyun 
1829*4882a593Smuzhiyun err_unlock_free:
1830*4882a593Smuzhiyun 	ovs_unlock();
1831*4882a593Smuzhiyun 	kfree_skb(reply);
1832*4882a593Smuzhiyun 	return err;
1833*4882a593Smuzhiyun }
1834*4882a593Smuzhiyun 
ovs_dp_cmd_set(struct sk_buff * skb,struct genl_info * info)1835*4882a593Smuzhiyun static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1836*4882a593Smuzhiyun {
1837*4882a593Smuzhiyun 	struct sk_buff *reply;
1838*4882a593Smuzhiyun 	struct datapath *dp;
1839*4882a593Smuzhiyun 	int err;
1840*4882a593Smuzhiyun 
1841*4882a593Smuzhiyun 	reply = ovs_dp_cmd_alloc_info();
1842*4882a593Smuzhiyun 	if (!reply)
1843*4882a593Smuzhiyun 		return -ENOMEM;
1844*4882a593Smuzhiyun 
1845*4882a593Smuzhiyun 	ovs_lock();
1846*4882a593Smuzhiyun 	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1847*4882a593Smuzhiyun 	err = PTR_ERR(dp);
1848*4882a593Smuzhiyun 	if (IS_ERR(dp))
1849*4882a593Smuzhiyun 		goto err_unlock_free;
1850*4882a593Smuzhiyun 
1851*4882a593Smuzhiyun 	err = ovs_dp_change(dp, info->attrs);
1852*4882a593Smuzhiyun 	if (err)
1853*4882a593Smuzhiyun 		goto err_unlock_free;
1854*4882a593Smuzhiyun 
1855*4882a593Smuzhiyun 	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1856*4882a593Smuzhiyun 				   info->snd_seq, 0, OVS_DP_CMD_SET);
1857*4882a593Smuzhiyun 	BUG_ON(err < 0);
1858*4882a593Smuzhiyun 
1859*4882a593Smuzhiyun 	ovs_unlock();
1860*4882a593Smuzhiyun 	ovs_notify(&dp_datapath_genl_family, reply, info);
1861*4882a593Smuzhiyun 
1862*4882a593Smuzhiyun 	return 0;
1863*4882a593Smuzhiyun 
1864*4882a593Smuzhiyun err_unlock_free:
1865*4882a593Smuzhiyun 	ovs_unlock();
1866*4882a593Smuzhiyun 	kfree_skb(reply);
1867*4882a593Smuzhiyun 	return err;
1868*4882a593Smuzhiyun }
1869*4882a593Smuzhiyun 
ovs_dp_cmd_get(struct sk_buff * skb,struct genl_info * info)1870*4882a593Smuzhiyun static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1871*4882a593Smuzhiyun {
1872*4882a593Smuzhiyun 	struct sk_buff *reply;
1873*4882a593Smuzhiyun 	struct datapath *dp;
1874*4882a593Smuzhiyun 	int err;
1875*4882a593Smuzhiyun 
1876*4882a593Smuzhiyun 	reply = ovs_dp_cmd_alloc_info();
1877*4882a593Smuzhiyun 	if (!reply)
1878*4882a593Smuzhiyun 		return -ENOMEM;
1879*4882a593Smuzhiyun 
1880*4882a593Smuzhiyun 	ovs_lock();
1881*4882a593Smuzhiyun 	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1882*4882a593Smuzhiyun 	if (IS_ERR(dp)) {
1883*4882a593Smuzhiyun 		err = PTR_ERR(dp);
1884*4882a593Smuzhiyun 		goto err_unlock_free;
1885*4882a593Smuzhiyun 	}
1886*4882a593Smuzhiyun 	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1887*4882a593Smuzhiyun 				   info->snd_seq, 0, OVS_DP_CMD_GET);
1888*4882a593Smuzhiyun 	BUG_ON(err < 0);
1889*4882a593Smuzhiyun 	ovs_unlock();
1890*4882a593Smuzhiyun 
1891*4882a593Smuzhiyun 	return genlmsg_reply(reply, info);
1892*4882a593Smuzhiyun 
1893*4882a593Smuzhiyun err_unlock_free:
1894*4882a593Smuzhiyun 	ovs_unlock();
1895*4882a593Smuzhiyun 	kfree_skb(reply);
1896*4882a593Smuzhiyun 	return err;
1897*4882a593Smuzhiyun }
1898*4882a593Smuzhiyun 
ovs_dp_cmd_dump(struct sk_buff * skb,struct netlink_callback * cb)1899*4882a593Smuzhiyun static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1900*4882a593Smuzhiyun {
1901*4882a593Smuzhiyun 	struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1902*4882a593Smuzhiyun 	struct datapath *dp;
1903*4882a593Smuzhiyun 	int skip = cb->args[0];
1904*4882a593Smuzhiyun 	int i = 0;
1905*4882a593Smuzhiyun 
1906*4882a593Smuzhiyun 	ovs_lock();
1907*4882a593Smuzhiyun 	list_for_each_entry(dp, &ovs_net->dps, list_node) {
1908*4882a593Smuzhiyun 		if (i >= skip &&
1909*4882a593Smuzhiyun 		    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1910*4882a593Smuzhiyun 					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1911*4882a593Smuzhiyun 					 OVS_DP_CMD_GET) < 0)
1912*4882a593Smuzhiyun 			break;
1913*4882a593Smuzhiyun 		i++;
1914*4882a593Smuzhiyun 	}
1915*4882a593Smuzhiyun 	ovs_unlock();
1916*4882a593Smuzhiyun 
1917*4882a593Smuzhiyun 	cb->args[0] = i;
1918*4882a593Smuzhiyun 
1919*4882a593Smuzhiyun 	return skb->len;
1920*4882a593Smuzhiyun }
1921*4882a593Smuzhiyun 
1922*4882a593Smuzhiyun static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1923*4882a593Smuzhiyun 	[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1924*4882a593Smuzhiyun 	[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1925*4882a593Smuzhiyun 	[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
1926*4882a593Smuzhiyun 	[OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32, 0,
1927*4882a593Smuzhiyun 		PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
1928*4882a593Smuzhiyun };
1929*4882a593Smuzhiyun 
1930*4882a593Smuzhiyun static const struct genl_small_ops dp_datapath_genl_ops[] = {
1931*4882a593Smuzhiyun 	{ .cmd = OVS_DP_CMD_NEW,
1932*4882a593Smuzhiyun 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1933*4882a593Smuzhiyun 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1934*4882a593Smuzhiyun 	  .doit = ovs_dp_cmd_new
1935*4882a593Smuzhiyun 	},
1936*4882a593Smuzhiyun 	{ .cmd = OVS_DP_CMD_DEL,
1937*4882a593Smuzhiyun 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1938*4882a593Smuzhiyun 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1939*4882a593Smuzhiyun 	  .doit = ovs_dp_cmd_del
1940*4882a593Smuzhiyun 	},
1941*4882a593Smuzhiyun 	{ .cmd = OVS_DP_CMD_GET,
1942*4882a593Smuzhiyun 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1943*4882a593Smuzhiyun 	  .flags = 0,		    /* OK for unprivileged users. */
1944*4882a593Smuzhiyun 	  .doit = ovs_dp_cmd_get,
1945*4882a593Smuzhiyun 	  .dumpit = ovs_dp_cmd_dump
1946*4882a593Smuzhiyun 	},
1947*4882a593Smuzhiyun 	{ .cmd = OVS_DP_CMD_SET,
1948*4882a593Smuzhiyun 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1949*4882a593Smuzhiyun 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1950*4882a593Smuzhiyun 	  .doit = ovs_dp_cmd_set,
1951*4882a593Smuzhiyun 	},
1952*4882a593Smuzhiyun };
1953*4882a593Smuzhiyun 
1954*4882a593Smuzhiyun static struct genl_family dp_datapath_genl_family __ro_after_init = {
1955*4882a593Smuzhiyun 	.hdrsize = sizeof(struct ovs_header),
1956*4882a593Smuzhiyun 	.name = OVS_DATAPATH_FAMILY,
1957*4882a593Smuzhiyun 	.version = OVS_DATAPATH_VERSION,
1958*4882a593Smuzhiyun 	.maxattr = OVS_DP_ATTR_MAX,
1959*4882a593Smuzhiyun 	.policy = datapath_policy,
1960*4882a593Smuzhiyun 	.netnsok = true,
1961*4882a593Smuzhiyun 	.parallel_ops = true,
1962*4882a593Smuzhiyun 	.small_ops = dp_datapath_genl_ops,
1963*4882a593Smuzhiyun 	.n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops),
1964*4882a593Smuzhiyun 	.mcgrps = &ovs_dp_datapath_multicast_group,
1965*4882a593Smuzhiyun 	.n_mcgrps = 1,
1966*4882a593Smuzhiyun 	.module = THIS_MODULE,
1967*4882a593Smuzhiyun };
1968*4882a593Smuzhiyun 
1969*4882a593Smuzhiyun /* Called with ovs_mutex or RCU read lock. */
ovs_vport_cmd_fill_info(struct vport * vport,struct sk_buff * skb,struct net * net,u32 portid,u32 seq,u32 flags,u8 cmd,gfp_t gfp)1970*4882a593Smuzhiyun static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1971*4882a593Smuzhiyun 				   struct net *net, u32 portid, u32 seq,
1972*4882a593Smuzhiyun 				   u32 flags, u8 cmd, gfp_t gfp)
1973*4882a593Smuzhiyun {
1974*4882a593Smuzhiyun 	struct ovs_header *ovs_header;
1975*4882a593Smuzhiyun 	struct ovs_vport_stats vport_stats;
1976*4882a593Smuzhiyun 	int err;
1977*4882a593Smuzhiyun 
1978*4882a593Smuzhiyun 	ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1979*4882a593Smuzhiyun 				 flags, cmd);
1980*4882a593Smuzhiyun 	if (!ovs_header)
1981*4882a593Smuzhiyun 		return -EMSGSIZE;
1982*4882a593Smuzhiyun 
1983*4882a593Smuzhiyun 	ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1984*4882a593Smuzhiyun 
1985*4882a593Smuzhiyun 	if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1986*4882a593Smuzhiyun 	    nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1987*4882a593Smuzhiyun 	    nla_put_string(skb, OVS_VPORT_ATTR_NAME,
1988*4882a593Smuzhiyun 			   ovs_vport_name(vport)) ||
1989*4882a593Smuzhiyun 	    nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
1990*4882a593Smuzhiyun 		goto nla_put_failure;
1991*4882a593Smuzhiyun 
1992*4882a593Smuzhiyun 	if (!net_eq(net, dev_net(vport->dev))) {
1993*4882a593Smuzhiyun 		int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
1994*4882a593Smuzhiyun 
1995*4882a593Smuzhiyun 		if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
1996*4882a593Smuzhiyun 			goto nla_put_failure;
1997*4882a593Smuzhiyun 	}
1998*4882a593Smuzhiyun 
1999*4882a593Smuzhiyun 	ovs_vport_get_stats(vport, &vport_stats);
2000*4882a593Smuzhiyun 	if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
2001*4882a593Smuzhiyun 			  sizeof(struct ovs_vport_stats), &vport_stats,
2002*4882a593Smuzhiyun 			  OVS_VPORT_ATTR_PAD))
2003*4882a593Smuzhiyun 		goto nla_put_failure;
2004*4882a593Smuzhiyun 
2005*4882a593Smuzhiyun 	if (ovs_vport_get_upcall_portids(vport, skb))
2006*4882a593Smuzhiyun 		goto nla_put_failure;
2007*4882a593Smuzhiyun 
2008*4882a593Smuzhiyun 	err = ovs_vport_get_options(vport, skb);
2009*4882a593Smuzhiyun 	if (err == -EMSGSIZE)
2010*4882a593Smuzhiyun 		goto error;
2011*4882a593Smuzhiyun 
2012*4882a593Smuzhiyun 	genlmsg_end(skb, ovs_header);
2013*4882a593Smuzhiyun 	return 0;
2014*4882a593Smuzhiyun 
2015*4882a593Smuzhiyun nla_put_failure:
2016*4882a593Smuzhiyun 	err = -EMSGSIZE;
2017*4882a593Smuzhiyun error:
2018*4882a593Smuzhiyun 	genlmsg_cancel(skb, ovs_header);
2019*4882a593Smuzhiyun 	return err;
2020*4882a593Smuzhiyun }
2021*4882a593Smuzhiyun 
ovs_vport_cmd_alloc_info(void)2022*4882a593Smuzhiyun static struct sk_buff *ovs_vport_cmd_alloc_info(void)
2023*4882a593Smuzhiyun {
2024*4882a593Smuzhiyun 	return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2025*4882a593Smuzhiyun }
2026*4882a593Smuzhiyun 
2027*4882a593Smuzhiyun /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
ovs_vport_cmd_build_info(struct vport * vport,struct net * net,u32 portid,u32 seq,u8 cmd)2028*4882a593Smuzhiyun struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
2029*4882a593Smuzhiyun 					 u32 portid, u32 seq, u8 cmd)
2030*4882a593Smuzhiyun {
2031*4882a593Smuzhiyun 	struct sk_buff *skb;
2032*4882a593Smuzhiyun 	int retval;
2033*4882a593Smuzhiyun 
2034*4882a593Smuzhiyun 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2035*4882a593Smuzhiyun 	if (!skb)
2036*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
2037*4882a593Smuzhiyun 
2038*4882a593Smuzhiyun 	retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
2039*4882a593Smuzhiyun 					 GFP_KERNEL);
2040*4882a593Smuzhiyun 	BUG_ON(retval < 0);
2041*4882a593Smuzhiyun 
2042*4882a593Smuzhiyun 	return skb;
2043*4882a593Smuzhiyun }
2044*4882a593Smuzhiyun 
2045*4882a593Smuzhiyun /* Called with ovs_mutex or RCU read lock. */
lookup_vport(struct net * net,const struct ovs_header * ovs_header,struct nlattr * a[OVS_VPORT_ATTR_MAX+1])2046*4882a593Smuzhiyun static struct vport *lookup_vport(struct net *net,
2047*4882a593Smuzhiyun 				  const struct ovs_header *ovs_header,
2048*4882a593Smuzhiyun 				  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
2049*4882a593Smuzhiyun {
2050*4882a593Smuzhiyun 	struct datapath *dp;
2051*4882a593Smuzhiyun 	struct vport *vport;
2052*4882a593Smuzhiyun 
2053*4882a593Smuzhiyun 	if (a[OVS_VPORT_ATTR_IFINDEX])
2054*4882a593Smuzhiyun 		return ERR_PTR(-EOPNOTSUPP);
2055*4882a593Smuzhiyun 	if (a[OVS_VPORT_ATTR_NAME]) {
2056*4882a593Smuzhiyun 		vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
2057*4882a593Smuzhiyun 		if (!vport)
2058*4882a593Smuzhiyun 			return ERR_PTR(-ENODEV);
2059*4882a593Smuzhiyun 		if (ovs_header->dp_ifindex &&
2060*4882a593Smuzhiyun 		    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
2061*4882a593Smuzhiyun 			return ERR_PTR(-ENODEV);
2062*4882a593Smuzhiyun 		return vport;
2063*4882a593Smuzhiyun 	} else if (a[OVS_VPORT_ATTR_PORT_NO]) {
2064*4882a593Smuzhiyun 		u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
2065*4882a593Smuzhiyun 
2066*4882a593Smuzhiyun 		if (port_no >= DP_MAX_PORTS)
2067*4882a593Smuzhiyun 			return ERR_PTR(-EFBIG);
2068*4882a593Smuzhiyun 
2069*4882a593Smuzhiyun 		dp = get_dp(net, ovs_header->dp_ifindex);
2070*4882a593Smuzhiyun 		if (!dp)
2071*4882a593Smuzhiyun 			return ERR_PTR(-ENODEV);
2072*4882a593Smuzhiyun 
2073*4882a593Smuzhiyun 		vport = ovs_vport_ovsl_rcu(dp, port_no);
2074*4882a593Smuzhiyun 		if (!vport)
2075*4882a593Smuzhiyun 			return ERR_PTR(-ENODEV);
2076*4882a593Smuzhiyun 		return vport;
2077*4882a593Smuzhiyun 	} else
2078*4882a593Smuzhiyun 		return ERR_PTR(-EINVAL);
2079*4882a593Smuzhiyun 
2080*4882a593Smuzhiyun }
2081*4882a593Smuzhiyun 
ovs_get_max_headroom(struct datapath * dp)2082*4882a593Smuzhiyun static unsigned int ovs_get_max_headroom(struct datapath *dp)
2083*4882a593Smuzhiyun {
2084*4882a593Smuzhiyun 	unsigned int dev_headroom, max_headroom = 0;
2085*4882a593Smuzhiyun 	struct net_device *dev;
2086*4882a593Smuzhiyun 	struct vport *vport;
2087*4882a593Smuzhiyun 	int i;
2088*4882a593Smuzhiyun 
2089*4882a593Smuzhiyun 	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2090*4882a593Smuzhiyun 		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2091*4882a593Smuzhiyun 					 lockdep_ovsl_is_held()) {
2092*4882a593Smuzhiyun 			dev = vport->dev;
2093*4882a593Smuzhiyun 			dev_headroom = netdev_get_fwd_headroom(dev);
2094*4882a593Smuzhiyun 			if (dev_headroom > max_headroom)
2095*4882a593Smuzhiyun 				max_headroom = dev_headroom;
2096*4882a593Smuzhiyun 		}
2097*4882a593Smuzhiyun 	}
2098*4882a593Smuzhiyun 
2099*4882a593Smuzhiyun 	return max_headroom;
2100*4882a593Smuzhiyun }
2101*4882a593Smuzhiyun 
2102*4882a593Smuzhiyun /* Called with ovs_mutex */
ovs_update_headroom(struct datapath * dp,unsigned int new_headroom)2103*4882a593Smuzhiyun static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
2104*4882a593Smuzhiyun {
2105*4882a593Smuzhiyun 	struct vport *vport;
2106*4882a593Smuzhiyun 	int i;
2107*4882a593Smuzhiyun 
2108*4882a593Smuzhiyun 	dp->max_headroom = new_headroom;
2109*4882a593Smuzhiyun 	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2110*4882a593Smuzhiyun 		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2111*4882a593Smuzhiyun 					 lockdep_ovsl_is_held())
2112*4882a593Smuzhiyun 			netdev_set_rx_headroom(vport->dev, new_headroom);
2113*4882a593Smuzhiyun 	}
2114*4882a593Smuzhiyun }
2115*4882a593Smuzhiyun 
ovs_vport_cmd_new(struct sk_buff * skb,struct genl_info * info)2116*4882a593Smuzhiyun static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
2117*4882a593Smuzhiyun {
2118*4882a593Smuzhiyun 	struct nlattr **a = info->attrs;
2119*4882a593Smuzhiyun 	struct ovs_header *ovs_header = info->userhdr;
2120*4882a593Smuzhiyun 	struct vport_parms parms;
2121*4882a593Smuzhiyun 	struct sk_buff *reply;
2122*4882a593Smuzhiyun 	struct vport *vport;
2123*4882a593Smuzhiyun 	struct datapath *dp;
2124*4882a593Smuzhiyun 	unsigned int new_headroom;
2125*4882a593Smuzhiyun 	u32 port_no;
2126*4882a593Smuzhiyun 	int err;
2127*4882a593Smuzhiyun 
2128*4882a593Smuzhiyun 	if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
2129*4882a593Smuzhiyun 	    !a[OVS_VPORT_ATTR_UPCALL_PID])
2130*4882a593Smuzhiyun 		return -EINVAL;
2131*4882a593Smuzhiyun 	if (a[OVS_VPORT_ATTR_IFINDEX])
2132*4882a593Smuzhiyun 		return -EOPNOTSUPP;
2133*4882a593Smuzhiyun 
2134*4882a593Smuzhiyun 	port_no = a[OVS_VPORT_ATTR_PORT_NO]
2135*4882a593Smuzhiyun 		? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
2136*4882a593Smuzhiyun 	if (port_no >= DP_MAX_PORTS)
2137*4882a593Smuzhiyun 		return -EFBIG;
2138*4882a593Smuzhiyun 
2139*4882a593Smuzhiyun 	reply = ovs_vport_cmd_alloc_info();
2140*4882a593Smuzhiyun 	if (!reply)
2141*4882a593Smuzhiyun 		return -ENOMEM;
2142*4882a593Smuzhiyun 
2143*4882a593Smuzhiyun 	ovs_lock();
2144*4882a593Smuzhiyun restart:
2145*4882a593Smuzhiyun 	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
2146*4882a593Smuzhiyun 	err = -ENODEV;
2147*4882a593Smuzhiyun 	if (!dp)
2148*4882a593Smuzhiyun 		goto exit_unlock_free;
2149*4882a593Smuzhiyun 
2150*4882a593Smuzhiyun 	if (port_no) {
2151*4882a593Smuzhiyun 		vport = ovs_vport_ovsl(dp, port_no);
2152*4882a593Smuzhiyun 		err = -EBUSY;
2153*4882a593Smuzhiyun 		if (vport)
2154*4882a593Smuzhiyun 			goto exit_unlock_free;
2155*4882a593Smuzhiyun 	} else {
2156*4882a593Smuzhiyun 		for (port_no = 1; ; port_no++) {
2157*4882a593Smuzhiyun 			if (port_no >= DP_MAX_PORTS) {
2158*4882a593Smuzhiyun 				err = -EFBIG;
2159*4882a593Smuzhiyun 				goto exit_unlock_free;
2160*4882a593Smuzhiyun 			}
2161*4882a593Smuzhiyun 			vport = ovs_vport_ovsl(dp, port_no);
2162*4882a593Smuzhiyun 			if (!vport)
2163*4882a593Smuzhiyun 				break;
2164*4882a593Smuzhiyun 		}
2165*4882a593Smuzhiyun 	}
2166*4882a593Smuzhiyun 
2167*4882a593Smuzhiyun 	parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
2168*4882a593Smuzhiyun 	parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2169*4882a593Smuzhiyun 	parms.options = a[OVS_VPORT_ATTR_OPTIONS];
2170*4882a593Smuzhiyun 	parms.dp = dp;
2171*4882a593Smuzhiyun 	parms.port_no = port_no;
2172*4882a593Smuzhiyun 	parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
2173*4882a593Smuzhiyun 
2174*4882a593Smuzhiyun 	vport = new_vport(&parms);
2175*4882a593Smuzhiyun 	err = PTR_ERR(vport);
2176*4882a593Smuzhiyun 	if (IS_ERR(vport)) {
2177*4882a593Smuzhiyun 		if (err == -EAGAIN)
2178*4882a593Smuzhiyun 			goto restart;
2179*4882a593Smuzhiyun 		goto exit_unlock_free;
2180*4882a593Smuzhiyun 	}
2181*4882a593Smuzhiyun 
2182*4882a593Smuzhiyun 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2183*4882a593Smuzhiyun 				      info->snd_portid, info->snd_seq, 0,
2184*4882a593Smuzhiyun 				      OVS_VPORT_CMD_NEW, GFP_KERNEL);
2185*4882a593Smuzhiyun 
2186*4882a593Smuzhiyun 	new_headroom = netdev_get_fwd_headroom(vport->dev);
2187*4882a593Smuzhiyun 
2188*4882a593Smuzhiyun 	if (new_headroom > dp->max_headroom)
2189*4882a593Smuzhiyun 		ovs_update_headroom(dp, new_headroom);
2190*4882a593Smuzhiyun 	else
2191*4882a593Smuzhiyun 		netdev_set_rx_headroom(vport->dev, dp->max_headroom);
2192*4882a593Smuzhiyun 
2193*4882a593Smuzhiyun 	BUG_ON(err < 0);
2194*4882a593Smuzhiyun 	ovs_unlock();
2195*4882a593Smuzhiyun 
2196*4882a593Smuzhiyun 	ovs_notify(&dp_vport_genl_family, reply, info);
2197*4882a593Smuzhiyun 	return 0;
2198*4882a593Smuzhiyun 
2199*4882a593Smuzhiyun exit_unlock_free:
2200*4882a593Smuzhiyun 	ovs_unlock();
2201*4882a593Smuzhiyun 	kfree_skb(reply);
2202*4882a593Smuzhiyun 	return err;
2203*4882a593Smuzhiyun }
2204*4882a593Smuzhiyun 
ovs_vport_cmd_set(struct sk_buff * skb,struct genl_info * info)2205*4882a593Smuzhiyun static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2206*4882a593Smuzhiyun {
2207*4882a593Smuzhiyun 	struct nlattr **a = info->attrs;
2208*4882a593Smuzhiyun 	struct sk_buff *reply;
2209*4882a593Smuzhiyun 	struct vport *vport;
2210*4882a593Smuzhiyun 	int err;
2211*4882a593Smuzhiyun 
2212*4882a593Smuzhiyun 	reply = ovs_vport_cmd_alloc_info();
2213*4882a593Smuzhiyun 	if (!reply)
2214*4882a593Smuzhiyun 		return -ENOMEM;
2215*4882a593Smuzhiyun 
2216*4882a593Smuzhiyun 	ovs_lock();
2217*4882a593Smuzhiyun 	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2218*4882a593Smuzhiyun 	err = PTR_ERR(vport);
2219*4882a593Smuzhiyun 	if (IS_ERR(vport))
2220*4882a593Smuzhiyun 		goto exit_unlock_free;
2221*4882a593Smuzhiyun 
2222*4882a593Smuzhiyun 	if (a[OVS_VPORT_ATTR_TYPE] &&
2223*4882a593Smuzhiyun 	    nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
2224*4882a593Smuzhiyun 		err = -EINVAL;
2225*4882a593Smuzhiyun 		goto exit_unlock_free;
2226*4882a593Smuzhiyun 	}
2227*4882a593Smuzhiyun 
2228*4882a593Smuzhiyun 	if (a[OVS_VPORT_ATTR_OPTIONS]) {
2229*4882a593Smuzhiyun 		err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
2230*4882a593Smuzhiyun 		if (err)
2231*4882a593Smuzhiyun 			goto exit_unlock_free;
2232*4882a593Smuzhiyun 	}
2233*4882a593Smuzhiyun 
2234*4882a593Smuzhiyun 
2235*4882a593Smuzhiyun 	if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2236*4882a593Smuzhiyun 		struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2237*4882a593Smuzhiyun 
2238*4882a593Smuzhiyun 		err = ovs_vport_set_upcall_portids(vport, ids);
2239*4882a593Smuzhiyun 		if (err)
2240*4882a593Smuzhiyun 			goto exit_unlock_free;
2241*4882a593Smuzhiyun 	}
2242*4882a593Smuzhiyun 
2243*4882a593Smuzhiyun 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2244*4882a593Smuzhiyun 				      info->snd_portid, info->snd_seq, 0,
2245*4882a593Smuzhiyun 				      OVS_VPORT_CMD_SET, GFP_KERNEL);
2246*4882a593Smuzhiyun 	BUG_ON(err < 0);
2247*4882a593Smuzhiyun 
2248*4882a593Smuzhiyun 	ovs_unlock();
2249*4882a593Smuzhiyun 	ovs_notify(&dp_vport_genl_family, reply, info);
2250*4882a593Smuzhiyun 	return 0;
2251*4882a593Smuzhiyun 
2252*4882a593Smuzhiyun exit_unlock_free:
2253*4882a593Smuzhiyun 	ovs_unlock();
2254*4882a593Smuzhiyun 	kfree_skb(reply);
2255*4882a593Smuzhiyun 	return err;
2256*4882a593Smuzhiyun }
2257*4882a593Smuzhiyun 
ovs_vport_cmd_del(struct sk_buff * skb,struct genl_info * info)2258*4882a593Smuzhiyun static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2259*4882a593Smuzhiyun {
2260*4882a593Smuzhiyun 	bool update_headroom = false;
2261*4882a593Smuzhiyun 	struct nlattr **a = info->attrs;
2262*4882a593Smuzhiyun 	struct sk_buff *reply;
2263*4882a593Smuzhiyun 	struct datapath *dp;
2264*4882a593Smuzhiyun 	struct vport *vport;
2265*4882a593Smuzhiyun 	unsigned int new_headroom;
2266*4882a593Smuzhiyun 	int err;
2267*4882a593Smuzhiyun 
2268*4882a593Smuzhiyun 	reply = ovs_vport_cmd_alloc_info();
2269*4882a593Smuzhiyun 	if (!reply)
2270*4882a593Smuzhiyun 		return -ENOMEM;
2271*4882a593Smuzhiyun 
2272*4882a593Smuzhiyun 	ovs_lock();
2273*4882a593Smuzhiyun 	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2274*4882a593Smuzhiyun 	err = PTR_ERR(vport);
2275*4882a593Smuzhiyun 	if (IS_ERR(vport))
2276*4882a593Smuzhiyun 		goto exit_unlock_free;
2277*4882a593Smuzhiyun 
2278*4882a593Smuzhiyun 	if (vport->port_no == OVSP_LOCAL) {
2279*4882a593Smuzhiyun 		err = -EINVAL;
2280*4882a593Smuzhiyun 		goto exit_unlock_free;
2281*4882a593Smuzhiyun 	}
2282*4882a593Smuzhiyun 
2283*4882a593Smuzhiyun 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2284*4882a593Smuzhiyun 				      info->snd_portid, info->snd_seq, 0,
2285*4882a593Smuzhiyun 				      OVS_VPORT_CMD_DEL, GFP_KERNEL);
2286*4882a593Smuzhiyun 	BUG_ON(err < 0);
2287*4882a593Smuzhiyun 
2288*4882a593Smuzhiyun 	/* the vport deletion may trigger dp headroom update */
2289*4882a593Smuzhiyun 	dp = vport->dp;
2290*4882a593Smuzhiyun 	if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
2291*4882a593Smuzhiyun 		update_headroom = true;
2292*4882a593Smuzhiyun 
2293*4882a593Smuzhiyun 	netdev_reset_rx_headroom(vport->dev);
2294*4882a593Smuzhiyun 	ovs_dp_detach_port(vport);
2295*4882a593Smuzhiyun 
2296*4882a593Smuzhiyun 	if (update_headroom) {
2297*4882a593Smuzhiyun 		new_headroom = ovs_get_max_headroom(dp);
2298*4882a593Smuzhiyun 
2299*4882a593Smuzhiyun 		if (new_headroom < dp->max_headroom)
2300*4882a593Smuzhiyun 			ovs_update_headroom(dp, new_headroom);
2301*4882a593Smuzhiyun 	}
2302*4882a593Smuzhiyun 	ovs_unlock();
2303*4882a593Smuzhiyun 
2304*4882a593Smuzhiyun 	ovs_notify(&dp_vport_genl_family, reply, info);
2305*4882a593Smuzhiyun 	return 0;
2306*4882a593Smuzhiyun 
2307*4882a593Smuzhiyun exit_unlock_free:
2308*4882a593Smuzhiyun 	ovs_unlock();
2309*4882a593Smuzhiyun 	kfree_skb(reply);
2310*4882a593Smuzhiyun 	return err;
2311*4882a593Smuzhiyun }
2312*4882a593Smuzhiyun 
ovs_vport_cmd_get(struct sk_buff * skb,struct genl_info * info)2313*4882a593Smuzhiyun static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2314*4882a593Smuzhiyun {
2315*4882a593Smuzhiyun 	struct nlattr **a = info->attrs;
2316*4882a593Smuzhiyun 	struct ovs_header *ovs_header = info->userhdr;
2317*4882a593Smuzhiyun 	struct sk_buff *reply;
2318*4882a593Smuzhiyun 	struct vport *vport;
2319*4882a593Smuzhiyun 	int err;
2320*4882a593Smuzhiyun 
2321*4882a593Smuzhiyun 	reply = ovs_vport_cmd_alloc_info();
2322*4882a593Smuzhiyun 	if (!reply)
2323*4882a593Smuzhiyun 		return -ENOMEM;
2324*4882a593Smuzhiyun 
2325*4882a593Smuzhiyun 	rcu_read_lock();
2326*4882a593Smuzhiyun 	vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2327*4882a593Smuzhiyun 	err = PTR_ERR(vport);
2328*4882a593Smuzhiyun 	if (IS_ERR(vport))
2329*4882a593Smuzhiyun 		goto exit_unlock_free;
2330*4882a593Smuzhiyun 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2331*4882a593Smuzhiyun 				      info->snd_portid, info->snd_seq, 0,
2332*4882a593Smuzhiyun 				      OVS_VPORT_CMD_GET, GFP_ATOMIC);
2333*4882a593Smuzhiyun 	BUG_ON(err < 0);
2334*4882a593Smuzhiyun 	rcu_read_unlock();
2335*4882a593Smuzhiyun 
2336*4882a593Smuzhiyun 	return genlmsg_reply(reply, info);
2337*4882a593Smuzhiyun 
2338*4882a593Smuzhiyun exit_unlock_free:
2339*4882a593Smuzhiyun 	rcu_read_unlock();
2340*4882a593Smuzhiyun 	kfree_skb(reply);
2341*4882a593Smuzhiyun 	return err;
2342*4882a593Smuzhiyun }
2343*4882a593Smuzhiyun 
ovs_vport_cmd_dump(struct sk_buff * skb,struct netlink_callback * cb)2344*4882a593Smuzhiyun static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2345*4882a593Smuzhiyun {
2346*4882a593Smuzhiyun 	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2347*4882a593Smuzhiyun 	struct datapath *dp;
2348*4882a593Smuzhiyun 	int bucket = cb->args[0], skip = cb->args[1];
2349*4882a593Smuzhiyun 	int i, j = 0;
2350*4882a593Smuzhiyun 
2351*4882a593Smuzhiyun 	rcu_read_lock();
2352*4882a593Smuzhiyun 	dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
2353*4882a593Smuzhiyun 	if (!dp) {
2354*4882a593Smuzhiyun 		rcu_read_unlock();
2355*4882a593Smuzhiyun 		return -ENODEV;
2356*4882a593Smuzhiyun 	}
2357*4882a593Smuzhiyun 	for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2358*4882a593Smuzhiyun 		struct vport *vport;
2359*4882a593Smuzhiyun 
2360*4882a593Smuzhiyun 		j = 0;
2361*4882a593Smuzhiyun 		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2362*4882a593Smuzhiyun 			if (j >= skip &&
2363*4882a593Smuzhiyun 			    ovs_vport_cmd_fill_info(vport, skb,
2364*4882a593Smuzhiyun 						    sock_net(skb->sk),
2365*4882a593Smuzhiyun 						    NETLINK_CB(cb->skb).portid,
2366*4882a593Smuzhiyun 						    cb->nlh->nlmsg_seq,
2367*4882a593Smuzhiyun 						    NLM_F_MULTI,
2368*4882a593Smuzhiyun 						    OVS_VPORT_CMD_GET,
2369*4882a593Smuzhiyun 						    GFP_ATOMIC) < 0)
2370*4882a593Smuzhiyun 				goto out;
2371*4882a593Smuzhiyun 
2372*4882a593Smuzhiyun 			j++;
2373*4882a593Smuzhiyun 		}
2374*4882a593Smuzhiyun 		skip = 0;
2375*4882a593Smuzhiyun 	}
2376*4882a593Smuzhiyun out:
2377*4882a593Smuzhiyun 	rcu_read_unlock();
2378*4882a593Smuzhiyun 
2379*4882a593Smuzhiyun 	cb->args[0] = i;
2380*4882a593Smuzhiyun 	cb->args[1] = j;
2381*4882a593Smuzhiyun 
2382*4882a593Smuzhiyun 	return skb->len;
2383*4882a593Smuzhiyun }
2384*4882a593Smuzhiyun 
ovs_dp_masks_rebalance(struct work_struct * work)2385*4882a593Smuzhiyun static void ovs_dp_masks_rebalance(struct work_struct *work)
2386*4882a593Smuzhiyun {
2387*4882a593Smuzhiyun 	struct ovs_net *ovs_net = container_of(work, struct ovs_net,
2388*4882a593Smuzhiyun 					       masks_rebalance.work);
2389*4882a593Smuzhiyun 	struct datapath *dp;
2390*4882a593Smuzhiyun 
2391*4882a593Smuzhiyun 	ovs_lock();
2392*4882a593Smuzhiyun 
2393*4882a593Smuzhiyun 	list_for_each_entry(dp, &ovs_net->dps, list_node)
2394*4882a593Smuzhiyun 		ovs_flow_masks_rebalance(&dp->table);
2395*4882a593Smuzhiyun 
2396*4882a593Smuzhiyun 	ovs_unlock();
2397*4882a593Smuzhiyun 
2398*4882a593Smuzhiyun 	schedule_delayed_work(&ovs_net->masks_rebalance,
2399*4882a593Smuzhiyun 			      msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2400*4882a593Smuzhiyun }
2401*4882a593Smuzhiyun 
2402*4882a593Smuzhiyun static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2403*4882a593Smuzhiyun 	[OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2404*4882a593Smuzhiyun 	[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2405*4882a593Smuzhiyun 	[OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2406*4882a593Smuzhiyun 	[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2407*4882a593Smuzhiyun 	[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
2408*4882a593Smuzhiyun 	[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2409*4882a593Smuzhiyun 	[OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
2410*4882a593Smuzhiyun 	[OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
2411*4882a593Smuzhiyun };
2412*4882a593Smuzhiyun 
2413*4882a593Smuzhiyun static const struct genl_small_ops dp_vport_genl_ops[] = {
2414*4882a593Smuzhiyun 	{ .cmd = OVS_VPORT_CMD_NEW,
2415*4882a593Smuzhiyun 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2416*4882a593Smuzhiyun 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2417*4882a593Smuzhiyun 	  .doit = ovs_vport_cmd_new
2418*4882a593Smuzhiyun 	},
2419*4882a593Smuzhiyun 	{ .cmd = OVS_VPORT_CMD_DEL,
2420*4882a593Smuzhiyun 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2421*4882a593Smuzhiyun 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2422*4882a593Smuzhiyun 	  .doit = ovs_vport_cmd_del
2423*4882a593Smuzhiyun 	},
2424*4882a593Smuzhiyun 	{ .cmd = OVS_VPORT_CMD_GET,
2425*4882a593Smuzhiyun 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2426*4882a593Smuzhiyun 	  .flags = 0,		    /* OK for unprivileged users. */
2427*4882a593Smuzhiyun 	  .doit = ovs_vport_cmd_get,
2428*4882a593Smuzhiyun 	  .dumpit = ovs_vport_cmd_dump
2429*4882a593Smuzhiyun 	},
2430*4882a593Smuzhiyun 	{ .cmd = OVS_VPORT_CMD_SET,
2431*4882a593Smuzhiyun 	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2432*4882a593Smuzhiyun 	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2433*4882a593Smuzhiyun 	  .doit = ovs_vport_cmd_set,
2434*4882a593Smuzhiyun 	},
2435*4882a593Smuzhiyun };
2436*4882a593Smuzhiyun 
2437*4882a593Smuzhiyun struct genl_family dp_vport_genl_family __ro_after_init = {
2438*4882a593Smuzhiyun 	.hdrsize = sizeof(struct ovs_header),
2439*4882a593Smuzhiyun 	.name = OVS_VPORT_FAMILY,
2440*4882a593Smuzhiyun 	.version = OVS_VPORT_VERSION,
2441*4882a593Smuzhiyun 	.maxattr = OVS_VPORT_ATTR_MAX,
2442*4882a593Smuzhiyun 	.policy = vport_policy,
2443*4882a593Smuzhiyun 	.netnsok = true,
2444*4882a593Smuzhiyun 	.parallel_ops = true,
2445*4882a593Smuzhiyun 	.small_ops = dp_vport_genl_ops,
2446*4882a593Smuzhiyun 	.n_small_ops = ARRAY_SIZE(dp_vport_genl_ops),
2447*4882a593Smuzhiyun 	.mcgrps = &ovs_dp_vport_multicast_group,
2448*4882a593Smuzhiyun 	.n_mcgrps = 1,
2449*4882a593Smuzhiyun 	.module = THIS_MODULE,
2450*4882a593Smuzhiyun };
2451*4882a593Smuzhiyun 
2452*4882a593Smuzhiyun static struct genl_family * const dp_genl_families[] = {
2453*4882a593Smuzhiyun 	&dp_datapath_genl_family,
2454*4882a593Smuzhiyun 	&dp_vport_genl_family,
2455*4882a593Smuzhiyun 	&dp_flow_genl_family,
2456*4882a593Smuzhiyun 	&dp_packet_genl_family,
2457*4882a593Smuzhiyun 	&dp_meter_genl_family,
2458*4882a593Smuzhiyun #if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
2459*4882a593Smuzhiyun 	&dp_ct_limit_genl_family,
2460*4882a593Smuzhiyun #endif
2461*4882a593Smuzhiyun };
2462*4882a593Smuzhiyun 
dp_unregister_genl(int n_families)2463*4882a593Smuzhiyun static void dp_unregister_genl(int n_families)
2464*4882a593Smuzhiyun {
2465*4882a593Smuzhiyun 	int i;
2466*4882a593Smuzhiyun 
2467*4882a593Smuzhiyun 	for (i = 0; i < n_families; i++)
2468*4882a593Smuzhiyun 		genl_unregister_family(dp_genl_families[i]);
2469*4882a593Smuzhiyun }
2470*4882a593Smuzhiyun 
dp_register_genl(void)2471*4882a593Smuzhiyun static int __init dp_register_genl(void)
2472*4882a593Smuzhiyun {
2473*4882a593Smuzhiyun 	int err;
2474*4882a593Smuzhiyun 	int i;
2475*4882a593Smuzhiyun 
2476*4882a593Smuzhiyun 	for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2477*4882a593Smuzhiyun 
2478*4882a593Smuzhiyun 		err = genl_register_family(dp_genl_families[i]);
2479*4882a593Smuzhiyun 		if (err)
2480*4882a593Smuzhiyun 			goto error;
2481*4882a593Smuzhiyun 	}
2482*4882a593Smuzhiyun 
2483*4882a593Smuzhiyun 	return 0;
2484*4882a593Smuzhiyun 
2485*4882a593Smuzhiyun error:
2486*4882a593Smuzhiyun 	dp_unregister_genl(i);
2487*4882a593Smuzhiyun 	return err;
2488*4882a593Smuzhiyun }
2489*4882a593Smuzhiyun 
ovs_init_net(struct net * net)2490*4882a593Smuzhiyun static int __net_init ovs_init_net(struct net *net)
2491*4882a593Smuzhiyun {
2492*4882a593Smuzhiyun 	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2493*4882a593Smuzhiyun 	int err;
2494*4882a593Smuzhiyun 
2495*4882a593Smuzhiyun 	INIT_LIST_HEAD(&ovs_net->dps);
2496*4882a593Smuzhiyun 	INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2497*4882a593Smuzhiyun 	INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance);
2498*4882a593Smuzhiyun 
2499*4882a593Smuzhiyun 	err = ovs_ct_init(net);
2500*4882a593Smuzhiyun 	if (err)
2501*4882a593Smuzhiyun 		return err;
2502*4882a593Smuzhiyun 
2503*4882a593Smuzhiyun 	schedule_delayed_work(&ovs_net->masks_rebalance,
2504*4882a593Smuzhiyun 			      msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2505*4882a593Smuzhiyun 	return 0;
2506*4882a593Smuzhiyun }
2507*4882a593Smuzhiyun 
list_vports_from_net(struct net * net,struct net * dnet,struct list_head * head)2508*4882a593Smuzhiyun static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2509*4882a593Smuzhiyun 					    struct list_head *head)
2510*4882a593Smuzhiyun {
2511*4882a593Smuzhiyun 	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2512*4882a593Smuzhiyun 	struct datapath *dp;
2513*4882a593Smuzhiyun 
2514*4882a593Smuzhiyun 	list_for_each_entry(dp, &ovs_net->dps, list_node) {
2515*4882a593Smuzhiyun 		int i;
2516*4882a593Smuzhiyun 
2517*4882a593Smuzhiyun 		for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2518*4882a593Smuzhiyun 			struct vport *vport;
2519*4882a593Smuzhiyun 
2520*4882a593Smuzhiyun 			hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
2521*4882a593Smuzhiyun 				if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2522*4882a593Smuzhiyun 					continue;
2523*4882a593Smuzhiyun 
2524*4882a593Smuzhiyun 				if (dev_net(vport->dev) == dnet)
2525*4882a593Smuzhiyun 					list_add(&vport->detach_list, head);
2526*4882a593Smuzhiyun 			}
2527*4882a593Smuzhiyun 		}
2528*4882a593Smuzhiyun 	}
2529*4882a593Smuzhiyun }
2530*4882a593Smuzhiyun 
ovs_exit_net(struct net * dnet)2531*4882a593Smuzhiyun static void __net_exit ovs_exit_net(struct net *dnet)
2532*4882a593Smuzhiyun {
2533*4882a593Smuzhiyun 	struct datapath *dp, *dp_next;
2534*4882a593Smuzhiyun 	struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2535*4882a593Smuzhiyun 	struct vport *vport, *vport_next;
2536*4882a593Smuzhiyun 	struct net *net;
2537*4882a593Smuzhiyun 	LIST_HEAD(head);
2538*4882a593Smuzhiyun 
2539*4882a593Smuzhiyun 	ovs_lock();
2540*4882a593Smuzhiyun 
2541*4882a593Smuzhiyun 	ovs_ct_exit(dnet);
2542*4882a593Smuzhiyun 
2543*4882a593Smuzhiyun 	list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2544*4882a593Smuzhiyun 		__dp_destroy(dp);
2545*4882a593Smuzhiyun 
2546*4882a593Smuzhiyun 	down_read(&net_rwsem);
2547*4882a593Smuzhiyun 	for_each_net(net)
2548*4882a593Smuzhiyun 		list_vports_from_net(net, dnet, &head);
2549*4882a593Smuzhiyun 	up_read(&net_rwsem);
2550*4882a593Smuzhiyun 
2551*4882a593Smuzhiyun 	/* Detach all vports from given namespace. */
2552*4882a593Smuzhiyun 	list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2553*4882a593Smuzhiyun 		list_del(&vport->detach_list);
2554*4882a593Smuzhiyun 		ovs_dp_detach_port(vport);
2555*4882a593Smuzhiyun 	}
2556*4882a593Smuzhiyun 
2557*4882a593Smuzhiyun 	ovs_unlock();
2558*4882a593Smuzhiyun 
2559*4882a593Smuzhiyun 	cancel_delayed_work_sync(&ovs_net->masks_rebalance);
2560*4882a593Smuzhiyun 	cancel_work_sync(&ovs_net->dp_notify_work);
2561*4882a593Smuzhiyun }
2562*4882a593Smuzhiyun 
2563*4882a593Smuzhiyun static struct pernet_operations ovs_net_ops = {
2564*4882a593Smuzhiyun 	.init = ovs_init_net,
2565*4882a593Smuzhiyun 	.exit = ovs_exit_net,
2566*4882a593Smuzhiyun 	.id   = &ovs_net_id,
2567*4882a593Smuzhiyun 	.size = sizeof(struct ovs_net),
2568*4882a593Smuzhiyun };
2569*4882a593Smuzhiyun 
dp_init(void)2570*4882a593Smuzhiyun static int __init dp_init(void)
2571*4882a593Smuzhiyun {
2572*4882a593Smuzhiyun 	int err;
2573*4882a593Smuzhiyun 
2574*4882a593Smuzhiyun 	BUILD_BUG_ON(sizeof(struct ovs_skb_cb) >
2575*4882a593Smuzhiyun 		     sizeof_field(struct sk_buff, cb));
2576*4882a593Smuzhiyun 
2577*4882a593Smuzhiyun 	pr_info("Open vSwitch switching datapath\n");
2578*4882a593Smuzhiyun 
2579*4882a593Smuzhiyun 	err = action_fifos_init();
2580*4882a593Smuzhiyun 	if (err)
2581*4882a593Smuzhiyun 		goto error;
2582*4882a593Smuzhiyun 
2583*4882a593Smuzhiyun 	err = ovs_internal_dev_rtnl_link_register();
2584*4882a593Smuzhiyun 	if (err)
2585*4882a593Smuzhiyun 		goto error_action_fifos_exit;
2586*4882a593Smuzhiyun 
2587*4882a593Smuzhiyun 	err = ovs_flow_init();
2588*4882a593Smuzhiyun 	if (err)
2589*4882a593Smuzhiyun 		goto error_unreg_rtnl_link;
2590*4882a593Smuzhiyun 
2591*4882a593Smuzhiyun 	err = ovs_vport_init();
2592*4882a593Smuzhiyun 	if (err)
2593*4882a593Smuzhiyun 		goto error_flow_exit;
2594*4882a593Smuzhiyun 
2595*4882a593Smuzhiyun 	err = register_pernet_device(&ovs_net_ops);
2596*4882a593Smuzhiyun 	if (err)
2597*4882a593Smuzhiyun 		goto error_vport_exit;
2598*4882a593Smuzhiyun 
2599*4882a593Smuzhiyun 	err = register_netdevice_notifier(&ovs_dp_device_notifier);
2600*4882a593Smuzhiyun 	if (err)
2601*4882a593Smuzhiyun 		goto error_netns_exit;
2602*4882a593Smuzhiyun 
2603*4882a593Smuzhiyun 	err = ovs_netdev_init();
2604*4882a593Smuzhiyun 	if (err)
2605*4882a593Smuzhiyun 		goto error_unreg_notifier;
2606*4882a593Smuzhiyun 
2607*4882a593Smuzhiyun 	err = dp_register_genl();
2608*4882a593Smuzhiyun 	if (err < 0)
2609*4882a593Smuzhiyun 		goto error_unreg_netdev;
2610*4882a593Smuzhiyun 
2611*4882a593Smuzhiyun 	return 0;
2612*4882a593Smuzhiyun 
2613*4882a593Smuzhiyun error_unreg_netdev:
2614*4882a593Smuzhiyun 	ovs_netdev_exit();
2615*4882a593Smuzhiyun error_unreg_notifier:
2616*4882a593Smuzhiyun 	unregister_netdevice_notifier(&ovs_dp_device_notifier);
2617*4882a593Smuzhiyun error_netns_exit:
2618*4882a593Smuzhiyun 	unregister_pernet_device(&ovs_net_ops);
2619*4882a593Smuzhiyun error_vport_exit:
2620*4882a593Smuzhiyun 	ovs_vport_exit();
2621*4882a593Smuzhiyun error_flow_exit:
2622*4882a593Smuzhiyun 	ovs_flow_exit();
2623*4882a593Smuzhiyun error_unreg_rtnl_link:
2624*4882a593Smuzhiyun 	ovs_internal_dev_rtnl_link_unregister();
2625*4882a593Smuzhiyun error_action_fifos_exit:
2626*4882a593Smuzhiyun 	action_fifos_exit();
2627*4882a593Smuzhiyun error:
2628*4882a593Smuzhiyun 	return err;
2629*4882a593Smuzhiyun }
2630*4882a593Smuzhiyun 
dp_cleanup(void)2631*4882a593Smuzhiyun static void dp_cleanup(void)
2632*4882a593Smuzhiyun {
2633*4882a593Smuzhiyun 	dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2634*4882a593Smuzhiyun 	ovs_netdev_exit();
2635*4882a593Smuzhiyun 	unregister_netdevice_notifier(&ovs_dp_device_notifier);
2636*4882a593Smuzhiyun 	unregister_pernet_device(&ovs_net_ops);
2637*4882a593Smuzhiyun 	rcu_barrier();
2638*4882a593Smuzhiyun 	ovs_vport_exit();
2639*4882a593Smuzhiyun 	ovs_flow_exit();
2640*4882a593Smuzhiyun 	ovs_internal_dev_rtnl_link_unregister();
2641*4882a593Smuzhiyun 	action_fifos_exit();
2642*4882a593Smuzhiyun }
2643*4882a593Smuzhiyun 
2644*4882a593Smuzhiyun module_init(dp_init);
2645*4882a593Smuzhiyun module_exit(dp_cleanup);
2646*4882a593Smuzhiyun 
2647*4882a593Smuzhiyun MODULE_DESCRIPTION("Open vSwitch switching datapath");
2648*4882a593Smuzhiyun MODULE_LICENSE("GPL");
2649*4882a593Smuzhiyun MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
2650*4882a593Smuzhiyun MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
2651*4882a593Smuzhiyun MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
2652*4882a593Smuzhiyun MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
2653*4882a593Smuzhiyun MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
2654*4882a593Smuzhiyun MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY);
2655