xref: /OK3568_Linux_fs/kernel/drivers/misc/sgi-xp/xpc_main.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * This file is subject to the terms and conditions of the GNU General Public
3*4882a593Smuzhiyun  * License.  See the file "COPYING" in the main directory of this archive
4*4882a593Smuzhiyun  * for more details.
5*4882a593Smuzhiyun  *
6*4882a593Smuzhiyun  * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
7*4882a593Smuzhiyun  * Copyright (c) 2004-2009 Silicon Graphics, Inc.  All Rights Reserved.
8*4882a593Smuzhiyun  */
9*4882a593Smuzhiyun 
10*4882a593Smuzhiyun /*
11*4882a593Smuzhiyun  * Cross Partition Communication (XPC) support - standard version.
12*4882a593Smuzhiyun  *
13*4882a593Smuzhiyun  *	XPC provides a message passing capability that crosses partition
14*4882a593Smuzhiyun  *	boundaries. This module is made up of two parts:
15*4882a593Smuzhiyun  *
16*4882a593Smuzhiyun  *	    partition	This part detects the presence/absence of other
17*4882a593Smuzhiyun  *			partitions. It provides a heartbeat and monitors
18*4882a593Smuzhiyun  *			the heartbeats of other partitions.
19*4882a593Smuzhiyun  *
20*4882a593Smuzhiyun  *	    channel	This part manages the channels and sends/receives
21*4882a593Smuzhiyun  *			messages across them to/from other partitions.
22*4882a593Smuzhiyun  *
23*4882a593Smuzhiyun  *	There are a couple of additional functions residing in XP, which
24*4882a593Smuzhiyun  *	provide an interface to XPC for its users.
25*4882a593Smuzhiyun  *
26*4882a593Smuzhiyun  *
27*4882a593Smuzhiyun  *	Caveats:
28*4882a593Smuzhiyun  *
29*4882a593Smuzhiyun  *	  . Currently on sn2, we have no way to determine which nasid an IRQ
30*4882a593Smuzhiyun  *	    came from. Thus, xpc_send_IRQ_sn2() does a remote amo write
31*4882a593Smuzhiyun  *	    followed by an IPI. The amo indicates where data is to be pulled
32*4882a593Smuzhiyun  *	    from, so after the IPI arrives, the remote partition checks the amo
33*4882a593Smuzhiyun  *	    word. The IPI can actually arrive before the amo however, so other
34*4882a593Smuzhiyun  *	    code must periodically check for this case. Also, remote amo
35*4882a593Smuzhiyun  *	    operations do not reliably time out. Thus we do a remote PIO read
36*4882a593Smuzhiyun  *	    solely to know whether the remote partition is down and whether we
37*4882a593Smuzhiyun  *	    should stop sending IPIs to it. This remote PIO read operation is
38*4882a593Smuzhiyun  *	    set up in a special nofault region so SAL knows to ignore (and
39*4882a593Smuzhiyun  *	    cleanup) any errors due to the remote amo write, PIO read, and/or
40*4882a593Smuzhiyun  *	    PIO write operations.
41*4882a593Smuzhiyun  *
42*4882a593Smuzhiyun  *	    If/when new hardware solves this IPI problem, we should abandon
43*4882a593Smuzhiyun  *	    the current approach.
44*4882a593Smuzhiyun  *
45*4882a593Smuzhiyun  */
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun #include <linux/module.h>
48*4882a593Smuzhiyun #include <linux/slab.h>
49*4882a593Smuzhiyun #include <linux/sysctl.h>
50*4882a593Smuzhiyun #include <linux/device.h>
51*4882a593Smuzhiyun #include <linux/delay.h>
52*4882a593Smuzhiyun #include <linux/reboot.h>
53*4882a593Smuzhiyun #include <linux/kdebug.h>
54*4882a593Smuzhiyun #include <linux/kthread.h>
55*4882a593Smuzhiyun #include "xpc.h"
56*4882a593Smuzhiyun 
57*4882a593Smuzhiyun #ifdef CONFIG_X86_64
58*4882a593Smuzhiyun #include <asm/traps.h>
59*4882a593Smuzhiyun #endif
60*4882a593Smuzhiyun 
61*4882a593Smuzhiyun /* define two XPC debug device structures to be used with dev_dbg() et al */
62*4882a593Smuzhiyun 
63*4882a593Smuzhiyun static struct device_driver xpc_dbg_name = {
64*4882a593Smuzhiyun 	.name = "xpc"
65*4882a593Smuzhiyun };
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun static struct device xpc_part_dbg_subname = {
68*4882a593Smuzhiyun 	.init_name = "",	/* set to "part" at xpc_init() time */
69*4882a593Smuzhiyun 	.driver = &xpc_dbg_name
70*4882a593Smuzhiyun };
71*4882a593Smuzhiyun 
72*4882a593Smuzhiyun static struct device xpc_chan_dbg_subname = {
73*4882a593Smuzhiyun 	.init_name = "",	/* set to "chan" at xpc_init() time */
74*4882a593Smuzhiyun 	.driver = &xpc_dbg_name
75*4882a593Smuzhiyun };
76*4882a593Smuzhiyun 
77*4882a593Smuzhiyun struct device *xpc_part = &xpc_part_dbg_subname;
78*4882a593Smuzhiyun struct device *xpc_chan = &xpc_chan_dbg_subname;
79*4882a593Smuzhiyun 
80*4882a593Smuzhiyun static int xpc_kdebug_ignore;
81*4882a593Smuzhiyun 
82*4882a593Smuzhiyun /* systune related variables for /proc/sys directories */
83*4882a593Smuzhiyun 
84*4882a593Smuzhiyun static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
85*4882a593Smuzhiyun static int xpc_hb_min_interval = 1;
86*4882a593Smuzhiyun static int xpc_hb_max_interval = 10;
87*4882a593Smuzhiyun 
88*4882a593Smuzhiyun static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
89*4882a593Smuzhiyun static int xpc_hb_check_min_interval = 10;
90*4882a593Smuzhiyun static int xpc_hb_check_max_interval = 120;
91*4882a593Smuzhiyun 
92*4882a593Smuzhiyun int xpc_disengage_timelimit = XPC_DISENGAGE_DEFAULT_TIMELIMIT;
93*4882a593Smuzhiyun static int xpc_disengage_min_timelimit;	/* = 0 */
94*4882a593Smuzhiyun static int xpc_disengage_max_timelimit = 120;
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun static struct ctl_table xpc_sys_xpc_hb_dir[] = {
97*4882a593Smuzhiyun 	{
98*4882a593Smuzhiyun 	 .procname = "hb_interval",
99*4882a593Smuzhiyun 	 .data = &xpc_hb_interval,
100*4882a593Smuzhiyun 	 .maxlen = sizeof(int),
101*4882a593Smuzhiyun 	 .mode = 0644,
102*4882a593Smuzhiyun 	 .proc_handler = proc_dointvec_minmax,
103*4882a593Smuzhiyun 	 .extra1 = &xpc_hb_min_interval,
104*4882a593Smuzhiyun 	 .extra2 = &xpc_hb_max_interval},
105*4882a593Smuzhiyun 	{
106*4882a593Smuzhiyun 	 .procname = "hb_check_interval",
107*4882a593Smuzhiyun 	 .data = &xpc_hb_check_interval,
108*4882a593Smuzhiyun 	 .maxlen = sizeof(int),
109*4882a593Smuzhiyun 	 .mode = 0644,
110*4882a593Smuzhiyun 	 .proc_handler = proc_dointvec_minmax,
111*4882a593Smuzhiyun 	 .extra1 = &xpc_hb_check_min_interval,
112*4882a593Smuzhiyun 	 .extra2 = &xpc_hb_check_max_interval},
113*4882a593Smuzhiyun 	{}
114*4882a593Smuzhiyun };
115*4882a593Smuzhiyun static struct ctl_table xpc_sys_xpc_dir[] = {
116*4882a593Smuzhiyun 	{
117*4882a593Smuzhiyun 	 .procname = "hb",
118*4882a593Smuzhiyun 	 .mode = 0555,
119*4882a593Smuzhiyun 	 .child = xpc_sys_xpc_hb_dir},
120*4882a593Smuzhiyun 	{
121*4882a593Smuzhiyun 	 .procname = "disengage_timelimit",
122*4882a593Smuzhiyun 	 .data = &xpc_disengage_timelimit,
123*4882a593Smuzhiyun 	 .maxlen = sizeof(int),
124*4882a593Smuzhiyun 	 .mode = 0644,
125*4882a593Smuzhiyun 	 .proc_handler = proc_dointvec_minmax,
126*4882a593Smuzhiyun 	 .extra1 = &xpc_disengage_min_timelimit,
127*4882a593Smuzhiyun 	 .extra2 = &xpc_disengage_max_timelimit},
128*4882a593Smuzhiyun 	{}
129*4882a593Smuzhiyun };
130*4882a593Smuzhiyun static struct ctl_table xpc_sys_dir[] = {
131*4882a593Smuzhiyun 	{
132*4882a593Smuzhiyun 	 .procname = "xpc",
133*4882a593Smuzhiyun 	 .mode = 0555,
134*4882a593Smuzhiyun 	 .child = xpc_sys_xpc_dir},
135*4882a593Smuzhiyun 	{}
136*4882a593Smuzhiyun };
137*4882a593Smuzhiyun static struct ctl_table_header *xpc_sysctl;
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun /* non-zero if any remote partition disengage was timed out */
140*4882a593Smuzhiyun int xpc_disengage_timedout;
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun /* #of activate IRQs received and not yet processed */
143*4882a593Smuzhiyun int xpc_activate_IRQ_rcvd;
144*4882a593Smuzhiyun DEFINE_SPINLOCK(xpc_activate_IRQ_rcvd_lock);
145*4882a593Smuzhiyun 
146*4882a593Smuzhiyun /* IRQ handler notifies this wait queue on receipt of an IRQ */
147*4882a593Smuzhiyun DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq);
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun static unsigned long xpc_hb_check_timeout;
150*4882a593Smuzhiyun static struct timer_list xpc_hb_timer;
151*4882a593Smuzhiyun 
152*4882a593Smuzhiyun /* notification that the xpc_hb_checker thread has exited */
153*4882a593Smuzhiyun static DECLARE_COMPLETION(xpc_hb_checker_exited);
154*4882a593Smuzhiyun 
155*4882a593Smuzhiyun /* notification that the xpc_discovery thread has exited */
156*4882a593Smuzhiyun static DECLARE_COMPLETION(xpc_discovery_exited);
157*4882a593Smuzhiyun 
158*4882a593Smuzhiyun static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
159*4882a593Smuzhiyun 
160*4882a593Smuzhiyun static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
161*4882a593Smuzhiyun static struct notifier_block xpc_reboot_notifier = {
162*4882a593Smuzhiyun 	.notifier_call = xpc_system_reboot,
163*4882a593Smuzhiyun };
164*4882a593Smuzhiyun 
165*4882a593Smuzhiyun static int xpc_system_die(struct notifier_block *, unsigned long, void *);
166*4882a593Smuzhiyun static struct notifier_block xpc_die_notifier = {
167*4882a593Smuzhiyun 	.notifier_call = xpc_system_die,
168*4882a593Smuzhiyun };
169*4882a593Smuzhiyun 
170*4882a593Smuzhiyun struct xpc_arch_operations xpc_arch_ops;
171*4882a593Smuzhiyun 
172*4882a593Smuzhiyun /*
173*4882a593Smuzhiyun  * Timer function to enforce the timelimit on the partition disengage.
174*4882a593Smuzhiyun  */
175*4882a593Smuzhiyun static void
xpc_timeout_partition_disengage(struct timer_list * t)176*4882a593Smuzhiyun xpc_timeout_partition_disengage(struct timer_list *t)
177*4882a593Smuzhiyun {
178*4882a593Smuzhiyun 	struct xpc_partition *part = from_timer(part, t, disengage_timer);
179*4882a593Smuzhiyun 
180*4882a593Smuzhiyun 	DBUG_ON(time_is_after_jiffies(part->disengage_timeout));
181*4882a593Smuzhiyun 
182*4882a593Smuzhiyun 	(void)xpc_partition_disengaged(part);
183*4882a593Smuzhiyun 
184*4882a593Smuzhiyun 	DBUG_ON(part->disengage_timeout != 0);
185*4882a593Smuzhiyun 	DBUG_ON(xpc_arch_ops.partition_engaged(XPC_PARTID(part)));
186*4882a593Smuzhiyun }
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun /*
189*4882a593Smuzhiyun  * Timer to produce the heartbeat.  The timer structures function is
190*4882a593Smuzhiyun  * already set when this is initially called.  A tunable is used to
191*4882a593Smuzhiyun  * specify when the next timeout should occur.
192*4882a593Smuzhiyun  */
193*4882a593Smuzhiyun static void
xpc_hb_beater(struct timer_list * unused)194*4882a593Smuzhiyun xpc_hb_beater(struct timer_list *unused)
195*4882a593Smuzhiyun {
196*4882a593Smuzhiyun 	xpc_arch_ops.increment_heartbeat();
197*4882a593Smuzhiyun 
198*4882a593Smuzhiyun 	if (time_is_before_eq_jiffies(xpc_hb_check_timeout))
199*4882a593Smuzhiyun 		wake_up_interruptible(&xpc_activate_IRQ_wq);
200*4882a593Smuzhiyun 
201*4882a593Smuzhiyun 	xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
202*4882a593Smuzhiyun 	add_timer(&xpc_hb_timer);
203*4882a593Smuzhiyun }
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun static void
xpc_start_hb_beater(void)206*4882a593Smuzhiyun xpc_start_hb_beater(void)
207*4882a593Smuzhiyun {
208*4882a593Smuzhiyun 	xpc_arch_ops.heartbeat_init();
209*4882a593Smuzhiyun 	timer_setup(&xpc_hb_timer, xpc_hb_beater, 0);
210*4882a593Smuzhiyun 	xpc_hb_beater(0);
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun 
213*4882a593Smuzhiyun static void
xpc_stop_hb_beater(void)214*4882a593Smuzhiyun xpc_stop_hb_beater(void)
215*4882a593Smuzhiyun {
216*4882a593Smuzhiyun 	del_timer_sync(&xpc_hb_timer);
217*4882a593Smuzhiyun 	xpc_arch_ops.heartbeat_exit();
218*4882a593Smuzhiyun }
219*4882a593Smuzhiyun 
220*4882a593Smuzhiyun /*
221*4882a593Smuzhiyun  * At periodic intervals, scan through all active partitions and ensure
222*4882a593Smuzhiyun  * their heartbeat is still active.  If not, the partition is deactivated.
223*4882a593Smuzhiyun  */
224*4882a593Smuzhiyun static void
xpc_check_remote_hb(void)225*4882a593Smuzhiyun xpc_check_remote_hb(void)
226*4882a593Smuzhiyun {
227*4882a593Smuzhiyun 	struct xpc_partition *part;
228*4882a593Smuzhiyun 	short partid;
229*4882a593Smuzhiyun 	enum xp_retval ret;
230*4882a593Smuzhiyun 
231*4882a593Smuzhiyun 	for (partid = 0; partid < xp_max_npartitions; partid++) {
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 		if (xpc_exiting)
234*4882a593Smuzhiyun 			break;
235*4882a593Smuzhiyun 
236*4882a593Smuzhiyun 		if (partid == xp_partition_id)
237*4882a593Smuzhiyun 			continue;
238*4882a593Smuzhiyun 
239*4882a593Smuzhiyun 		part = &xpc_partitions[partid];
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 		if (part->act_state == XPC_P_AS_INACTIVE ||
242*4882a593Smuzhiyun 		    part->act_state == XPC_P_AS_DEACTIVATING) {
243*4882a593Smuzhiyun 			continue;
244*4882a593Smuzhiyun 		}
245*4882a593Smuzhiyun 
246*4882a593Smuzhiyun 		ret = xpc_arch_ops.get_remote_heartbeat(part);
247*4882a593Smuzhiyun 		if (ret != xpSuccess)
248*4882a593Smuzhiyun 			XPC_DEACTIVATE_PARTITION(part, ret);
249*4882a593Smuzhiyun 	}
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun 
252*4882a593Smuzhiyun /*
253*4882a593Smuzhiyun  * This thread is responsible for nearly all of the partition
254*4882a593Smuzhiyun  * activation/deactivation.
255*4882a593Smuzhiyun  */
256*4882a593Smuzhiyun static int
xpc_hb_checker(void * ignore)257*4882a593Smuzhiyun xpc_hb_checker(void *ignore)
258*4882a593Smuzhiyun {
259*4882a593Smuzhiyun 	int force_IRQ = 0;
260*4882a593Smuzhiyun 
261*4882a593Smuzhiyun 	/* this thread was marked active by xpc_hb_init() */
262*4882a593Smuzhiyun 
263*4882a593Smuzhiyun 	set_cpus_allowed_ptr(current, cpumask_of(XPC_HB_CHECK_CPU));
264*4882a593Smuzhiyun 
265*4882a593Smuzhiyun 	/* set our heartbeating to other partitions into motion */
266*4882a593Smuzhiyun 	xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
267*4882a593Smuzhiyun 	xpc_start_hb_beater();
268*4882a593Smuzhiyun 
269*4882a593Smuzhiyun 	while (!xpc_exiting) {
270*4882a593Smuzhiyun 
271*4882a593Smuzhiyun 		dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
272*4882a593Smuzhiyun 			"been received\n",
273*4882a593Smuzhiyun 			(int)(xpc_hb_check_timeout - jiffies),
274*4882a593Smuzhiyun 			xpc_activate_IRQ_rcvd);
275*4882a593Smuzhiyun 
276*4882a593Smuzhiyun 		/* checking of remote heartbeats is skewed by IRQ handling */
277*4882a593Smuzhiyun 		if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) {
278*4882a593Smuzhiyun 			xpc_hb_check_timeout = jiffies +
279*4882a593Smuzhiyun 			    (xpc_hb_check_interval * HZ);
280*4882a593Smuzhiyun 
281*4882a593Smuzhiyun 			dev_dbg(xpc_part, "checking remote heartbeats\n");
282*4882a593Smuzhiyun 			xpc_check_remote_hb();
283*4882a593Smuzhiyun 		}
284*4882a593Smuzhiyun 
285*4882a593Smuzhiyun 		/* check for outstanding IRQs */
286*4882a593Smuzhiyun 		if (xpc_activate_IRQ_rcvd > 0 || force_IRQ != 0) {
287*4882a593Smuzhiyun 			force_IRQ = 0;
288*4882a593Smuzhiyun 			dev_dbg(xpc_part, "processing activate IRQs "
289*4882a593Smuzhiyun 				"received\n");
290*4882a593Smuzhiyun 			xpc_arch_ops.process_activate_IRQ_rcvd();
291*4882a593Smuzhiyun 		}
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun 		/* wait for IRQ or timeout */
294*4882a593Smuzhiyun 		(void)wait_event_interruptible(xpc_activate_IRQ_wq,
295*4882a593Smuzhiyun 					       (time_is_before_eq_jiffies(
296*4882a593Smuzhiyun 						xpc_hb_check_timeout) ||
297*4882a593Smuzhiyun 						xpc_activate_IRQ_rcvd > 0 ||
298*4882a593Smuzhiyun 						xpc_exiting));
299*4882a593Smuzhiyun 	}
300*4882a593Smuzhiyun 
301*4882a593Smuzhiyun 	xpc_stop_hb_beater();
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun 	dev_dbg(xpc_part, "heartbeat checker is exiting\n");
304*4882a593Smuzhiyun 
305*4882a593Smuzhiyun 	/* mark this thread as having exited */
306*4882a593Smuzhiyun 	complete(&xpc_hb_checker_exited);
307*4882a593Smuzhiyun 	return 0;
308*4882a593Smuzhiyun }
309*4882a593Smuzhiyun 
310*4882a593Smuzhiyun /*
311*4882a593Smuzhiyun  * This thread will attempt to discover other partitions to activate
312*4882a593Smuzhiyun  * based on info provided by SAL. This new thread is short lived and
313*4882a593Smuzhiyun  * will exit once discovery is complete.
314*4882a593Smuzhiyun  */
315*4882a593Smuzhiyun static int
xpc_initiate_discovery(void * ignore)316*4882a593Smuzhiyun xpc_initiate_discovery(void *ignore)
317*4882a593Smuzhiyun {
318*4882a593Smuzhiyun 	xpc_discovery();
319*4882a593Smuzhiyun 
320*4882a593Smuzhiyun 	dev_dbg(xpc_part, "discovery thread is exiting\n");
321*4882a593Smuzhiyun 
322*4882a593Smuzhiyun 	/* mark this thread as having exited */
323*4882a593Smuzhiyun 	complete(&xpc_discovery_exited);
324*4882a593Smuzhiyun 	return 0;
325*4882a593Smuzhiyun }
326*4882a593Smuzhiyun 
327*4882a593Smuzhiyun /*
328*4882a593Smuzhiyun  * The first kthread assigned to a newly activated partition is the one
329*4882a593Smuzhiyun  * created by XPC HB with which it calls xpc_activating(). XPC hangs on to
330*4882a593Smuzhiyun  * that kthread until the partition is brought down, at which time that kthread
331*4882a593Smuzhiyun  * returns back to XPC HB. (The return of that kthread will signify to XPC HB
332*4882a593Smuzhiyun  * that XPC has dismantled all communication infrastructure for the associated
333*4882a593Smuzhiyun  * partition.) This kthread becomes the channel manager for that partition.
334*4882a593Smuzhiyun  *
335*4882a593Smuzhiyun  * Each active partition has a channel manager, who, besides connecting and
336*4882a593Smuzhiyun  * disconnecting channels, will ensure that each of the partition's connected
337*4882a593Smuzhiyun  * channels has the required number of assigned kthreads to get the work done.
338*4882a593Smuzhiyun  */
339*4882a593Smuzhiyun static void
xpc_channel_mgr(struct xpc_partition * part)340*4882a593Smuzhiyun xpc_channel_mgr(struct xpc_partition *part)
341*4882a593Smuzhiyun {
342*4882a593Smuzhiyun 	while (part->act_state != XPC_P_AS_DEACTIVATING ||
343*4882a593Smuzhiyun 	       atomic_read(&part->nchannels_active) > 0 ||
344*4882a593Smuzhiyun 	       !xpc_partition_disengaged(part)) {
345*4882a593Smuzhiyun 
346*4882a593Smuzhiyun 		xpc_process_sent_chctl_flags(part);
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun 		/*
349*4882a593Smuzhiyun 		 * Wait until we've been requested to activate kthreads or
350*4882a593Smuzhiyun 		 * all of the channel's message queues have been torn down or
351*4882a593Smuzhiyun 		 * a signal is pending.
352*4882a593Smuzhiyun 		 *
353*4882a593Smuzhiyun 		 * The channel_mgr_requests is set to 1 after being awakened,
354*4882a593Smuzhiyun 		 * This is done to prevent the channel mgr from making one pass
355*4882a593Smuzhiyun 		 * through the loop for each request, since he will
356*4882a593Smuzhiyun 		 * be servicing all the requests in one pass. The reason it's
357*4882a593Smuzhiyun 		 * set to 1 instead of 0 is so that other kthreads will know
358*4882a593Smuzhiyun 		 * that the channel mgr is running and won't bother trying to
359*4882a593Smuzhiyun 		 * wake him up.
360*4882a593Smuzhiyun 		 */
361*4882a593Smuzhiyun 		atomic_dec(&part->channel_mgr_requests);
362*4882a593Smuzhiyun 		(void)wait_event_interruptible(part->channel_mgr_wq,
363*4882a593Smuzhiyun 				(atomic_read(&part->channel_mgr_requests) > 0 ||
364*4882a593Smuzhiyun 				 part->chctl.all_flags != 0 ||
365*4882a593Smuzhiyun 				 (part->act_state == XPC_P_AS_DEACTIVATING &&
366*4882a593Smuzhiyun 				 atomic_read(&part->nchannels_active) == 0 &&
367*4882a593Smuzhiyun 				 xpc_partition_disengaged(part))));
368*4882a593Smuzhiyun 		atomic_set(&part->channel_mgr_requests, 1);
369*4882a593Smuzhiyun 	}
370*4882a593Smuzhiyun }
371*4882a593Smuzhiyun 
372*4882a593Smuzhiyun /*
373*4882a593Smuzhiyun  * Guarantee that the kzalloc'd memory is cacheline aligned.
374*4882a593Smuzhiyun  */
375*4882a593Smuzhiyun void *
xpc_kzalloc_cacheline_aligned(size_t size,gfp_t flags,void ** base)376*4882a593Smuzhiyun xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
377*4882a593Smuzhiyun {
378*4882a593Smuzhiyun 	/* see if kzalloc will give us cachline aligned memory by default */
379*4882a593Smuzhiyun 	*base = kzalloc(size, flags);
380*4882a593Smuzhiyun 	if (*base == NULL)
381*4882a593Smuzhiyun 		return NULL;
382*4882a593Smuzhiyun 
383*4882a593Smuzhiyun 	if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
384*4882a593Smuzhiyun 		return *base;
385*4882a593Smuzhiyun 
386*4882a593Smuzhiyun 	kfree(*base);
387*4882a593Smuzhiyun 
388*4882a593Smuzhiyun 	/* nope, we'll have to do it ourselves */
389*4882a593Smuzhiyun 	*base = kzalloc(size + L1_CACHE_BYTES, flags);
390*4882a593Smuzhiyun 	if (*base == NULL)
391*4882a593Smuzhiyun 		return NULL;
392*4882a593Smuzhiyun 
393*4882a593Smuzhiyun 	return (void *)L1_CACHE_ALIGN((u64)*base);
394*4882a593Smuzhiyun }
395*4882a593Smuzhiyun 
396*4882a593Smuzhiyun /*
397*4882a593Smuzhiyun  * Setup the channel structures necessary to support XPartition Communication
398*4882a593Smuzhiyun  * between the specified remote partition and the local one.
399*4882a593Smuzhiyun  */
400*4882a593Smuzhiyun static enum xp_retval
xpc_setup_ch_structures(struct xpc_partition * part)401*4882a593Smuzhiyun xpc_setup_ch_structures(struct xpc_partition *part)
402*4882a593Smuzhiyun {
403*4882a593Smuzhiyun 	enum xp_retval ret;
404*4882a593Smuzhiyun 	int ch_number;
405*4882a593Smuzhiyun 	struct xpc_channel *ch;
406*4882a593Smuzhiyun 	short partid = XPC_PARTID(part);
407*4882a593Smuzhiyun 
408*4882a593Smuzhiyun 	/*
409*4882a593Smuzhiyun 	 * Allocate all of the channel structures as a contiguous chunk of
410*4882a593Smuzhiyun 	 * memory.
411*4882a593Smuzhiyun 	 */
412*4882a593Smuzhiyun 	DBUG_ON(part->channels != NULL);
413*4882a593Smuzhiyun 	part->channels = kcalloc(XPC_MAX_NCHANNELS,
414*4882a593Smuzhiyun 				 sizeof(struct xpc_channel),
415*4882a593Smuzhiyun 				 GFP_KERNEL);
416*4882a593Smuzhiyun 	if (part->channels == NULL) {
417*4882a593Smuzhiyun 		dev_err(xpc_chan, "can't get memory for channels\n");
418*4882a593Smuzhiyun 		return xpNoMemory;
419*4882a593Smuzhiyun 	}
420*4882a593Smuzhiyun 
421*4882a593Smuzhiyun 	/* allocate the remote open and close args */
422*4882a593Smuzhiyun 
423*4882a593Smuzhiyun 	part->remote_openclose_args =
424*4882a593Smuzhiyun 	    xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE,
425*4882a593Smuzhiyun 					  GFP_KERNEL, &part->
426*4882a593Smuzhiyun 					  remote_openclose_args_base);
427*4882a593Smuzhiyun 	if (part->remote_openclose_args == NULL) {
428*4882a593Smuzhiyun 		dev_err(xpc_chan, "can't get memory for remote connect args\n");
429*4882a593Smuzhiyun 		ret = xpNoMemory;
430*4882a593Smuzhiyun 		goto out_1;
431*4882a593Smuzhiyun 	}
432*4882a593Smuzhiyun 
433*4882a593Smuzhiyun 	part->chctl.all_flags = 0;
434*4882a593Smuzhiyun 	spin_lock_init(&part->chctl_lock);
435*4882a593Smuzhiyun 
436*4882a593Smuzhiyun 	atomic_set(&part->channel_mgr_requests, 1);
437*4882a593Smuzhiyun 	init_waitqueue_head(&part->channel_mgr_wq);
438*4882a593Smuzhiyun 
439*4882a593Smuzhiyun 	part->nchannels = XPC_MAX_NCHANNELS;
440*4882a593Smuzhiyun 
441*4882a593Smuzhiyun 	atomic_set(&part->nchannels_active, 0);
442*4882a593Smuzhiyun 	atomic_set(&part->nchannels_engaged, 0);
443*4882a593Smuzhiyun 
444*4882a593Smuzhiyun 	for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
445*4882a593Smuzhiyun 		ch = &part->channels[ch_number];
446*4882a593Smuzhiyun 
447*4882a593Smuzhiyun 		ch->partid = partid;
448*4882a593Smuzhiyun 		ch->number = ch_number;
449*4882a593Smuzhiyun 		ch->flags = XPC_C_DISCONNECTED;
450*4882a593Smuzhiyun 
451*4882a593Smuzhiyun 		atomic_set(&ch->kthreads_assigned, 0);
452*4882a593Smuzhiyun 		atomic_set(&ch->kthreads_idle, 0);
453*4882a593Smuzhiyun 		atomic_set(&ch->kthreads_active, 0);
454*4882a593Smuzhiyun 
455*4882a593Smuzhiyun 		atomic_set(&ch->references, 0);
456*4882a593Smuzhiyun 		atomic_set(&ch->n_to_notify, 0);
457*4882a593Smuzhiyun 
458*4882a593Smuzhiyun 		spin_lock_init(&ch->lock);
459*4882a593Smuzhiyun 		init_completion(&ch->wdisconnect_wait);
460*4882a593Smuzhiyun 
461*4882a593Smuzhiyun 		atomic_set(&ch->n_on_msg_allocate_wq, 0);
462*4882a593Smuzhiyun 		init_waitqueue_head(&ch->msg_allocate_wq);
463*4882a593Smuzhiyun 		init_waitqueue_head(&ch->idle_wq);
464*4882a593Smuzhiyun 	}
465*4882a593Smuzhiyun 
466*4882a593Smuzhiyun 	ret = xpc_arch_ops.setup_ch_structures(part);
467*4882a593Smuzhiyun 	if (ret != xpSuccess)
468*4882a593Smuzhiyun 		goto out_2;
469*4882a593Smuzhiyun 
470*4882a593Smuzhiyun 	/*
471*4882a593Smuzhiyun 	 * With the setting of the partition setup_state to XPC_P_SS_SETUP,
472*4882a593Smuzhiyun 	 * we're declaring that this partition is ready to go.
473*4882a593Smuzhiyun 	 */
474*4882a593Smuzhiyun 	part->setup_state = XPC_P_SS_SETUP;
475*4882a593Smuzhiyun 
476*4882a593Smuzhiyun 	return xpSuccess;
477*4882a593Smuzhiyun 
478*4882a593Smuzhiyun 	/* setup of ch structures failed */
479*4882a593Smuzhiyun out_2:
480*4882a593Smuzhiyun 	kfree(part->remote_openclose_args_base);
481*4882a593Smuzhiyun 	part->remote_openclose_args = NULL;
482*4882a593Smuzhiyun out_1:
483*4882a593Smuzhiyun 	kfree(part->channels);
484*4882a593Smuzhiyun 	part->channels = NULL;
485*4882a593Smuzhiyun 	return ret;
486*4882a593Smuzhiyun }
487*4882a593Smuzhiyun 
488*4882a593Smuzhiyun /*
489*4882a593Smuzhiyun  * Teardown the channel structures necessary to support XPartition Communication
490*4882a593Smuzhiyun  * between the specified remote partition and the local one.
491*4882a593Smuzhiyun  */
492*4882a593Smuzhiyun static void
xpc_teardown_ch_structures(struct xpc_partition * part)493*4882a593Smuzhiyun xpc_teardown_ch_structures(struct xpc_partition *part)
494*4882a593Smuzhiyun {
495*4882a593Smuzhiyun 	DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
496*4882a593Smuzhiyun 	DBUG_ON(atomic_read(&part->nchannels_active) != 0);
497*4882a593Smuzhiyun 
498*4882a593Smuzhiyun 	/*
499*4882a593Smuzhiyun 	 * Make this partition inaccessible to local processes by marking it
500*4882a593Smuzhiyun 	 * as no longer setup. Then wait before proceeding with the teardown
501*4882a593Smuzhiyun 	 * until all existing references cease.
502*4882a593Smuzhiyun 	 */
503*4882a593Smuzhiyun 	DBUG_ON(part->setup_state != XPC_P_SS_SETUP);
504*4882a593Smuzhiyun 	part->setup_state = XPC_P_SS_WTEARDOWN;
505*4882a593Smuzhiyun 
506*4882a593Smuzhiyun 	wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
507*4882a593Smuzhiyun 
508*4882a593Smuzhiyun 	/* now we can begin tearing down the infrastructure */
509*4882a593Smuzhiyun 
510*4882a593Smuzhiyun 	xpc_arch_ops.teardown_ch_structures(part);
511*4882a593Smuzhiyun 
512*4882a593Smuzhiyun 	kfree(part->remote_openclose_args_base);
513*4882a593Smuzhiyun 	part->remote_openclose_args = NULL;
514*4882a593Smuzhiyun 	kfree(part->channels);
515*4882a593Smuzhiyun 	part->channels = NULL;
516*4882a593Smuzhiyun 
517*4882a593Smuzhiyun 	part->setup_state = XPC_P_SS_TORNDOWN;
518*4882a593Smuzhiyun }
519*4882a593Smuzhiyun 
520*4882a593Smuzhiyun /*
521*4882a593Smuzhiyun  * When XPC HB determines that a partition has come up, it will create a new
522*4882a593Smuzhiyun  * kthread and that kthread will call this function to attempt to set up the
523*4882a593Smuzhiyun  * basic infrastructure used for Cross Partition Communication with the newly
524*4882a593Smuzhiyun  * upped partition.
525*4882a593Smuzhiyun  *
526*4882a593Smuzhiyun  * The kthread that was created by XPC HB and which setup the XPC
527*4882a593Smuzhiyun  * infrastructure will remain assigned to the partition becoming the channel
528*4882a593Smuzhiyun  * manager for that partition until the partition is deactivating, at which
529*4882a593Smuzhiyun  * time the kthread will teardown the XPC infrastructure and then exit.
530*4882a593Smuzhiyun  */
531*4882a593Smuzhiyun static int
xpc_activating(void * __partid)532*4882a593Smuzhiyun xpc_activating(void *__partid)
533*4882a593Smuzhiyun {
534*4882a593Smuzhiyun 	short partid = (u64)__partid;
535*4882a593Smuzhiyun 	struct xpc_partition *part = &xpc_partitions[partid];
536*4882a593Smuzhiyun 	unsigned long irq_flags;
537*4882a593Smuzhiyun 
538*4882a593Smuzhiyun 	DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
539*4882a593Smuzhiyun 
540*4882a593Smuzhiyun 	spin_lock_irqsave(&part->act_lock, irq_flags);
541*4882a593Smuzhiyun 
542*4882a593Smuzhiyun 	if (part->act_state == XPC_P_AS_DEACTIVATING) {
543*4882a593Smuzhiyun 		part->act_state = XPC_P_AS_INACTIVE;
544*4882a593Smuzhiyun 		spin_unlock_irqrestore(&part->act_lock, irq_flags);
545*4882a593Smuzhiyun 		part->remote_rp_pa = 0;
546*4882a593Smuzhiyun 		return 0;
547*4882a593Smuzhiyun 	}
548*4882a593Smuzhiyun 
549*4882a593Smuzhiyun 	/* indicate the thread is activating */
550*4882a593Smuzhiyun 	DBUG_ON(part->act_state != XPC_P_AS_ACTIVATION_REQ);
551*4882a593Smuzhiyun 	part->act_state = XPC_P_AS_ACTIVATING;
552*4882a593Smuzhiyun 
553*4882a593Smuzhiyun 	XPC_SET_REASON(part, 0, 0);
554*4882a593Smuzhiyun 	spin_unlock_irqrestore(&part->act_lock, irq_flags);
555*4882a593Smuzhiyun 
556*4882a593Smuzhiyun 	dev_dbg(xpc_part, "activating partition %d\n", partid);
557*4882a593Smuzhiyun 
558*4882a593Smuzhiyun 	xpc_arch_ops.allow_hb(partid);
559*4882a593Smuzhiyun 
560*4882a593Smuzhiyun 	if (xpc_setup_ch_structures(part) == xpSuccess) {
561*4882a593Smuzhiyun 		(void)xpc_part_ref(part);	/* this will always succeed */
562*4882a593Smuzhiyun 
563*4882a593Smuzhiyun 		if (xpc_arch_ops.make_first_contact(part) == xpSuccess) {
564*4882a593Smuzhiyun 			xpc_mark_partition_active(part);
565*4882a593Smuzhiyun 			xpc_channel_mgr(part);
566*4882a593Smuzhiyun 			/* won't return until partition is deactivating */
567*4882a593Smuzhiyun 		}
568*4882a593Smuzhiyun 
569*4882a593Smuzhiyun 		xpc_part_deref(part);
570*4882a593Smuzhiyun 		xpc_teardown_ch_structures(part);
571*4882a593Smuzhiyun 	}
572*4882a593Smuzhiyun 
573*4882a593Smuzhiyun 	xpc_arch_ops.disallow_hb(partid);
574*4882a593Smuzhiyun 	xpc_mark_partition_inactive(part);
575*4882a593Smuzhiyun 
576*4882a593Smuzhiyun 	if (part->reason == xpReactivating) {
577*4882a593Smuzhiyun 		/* interrupting ourselves results in activating partition */
578*4882a593Smuzhiyun 		xpc_arch_ops.request_partition_reactivation(part);
579*4882a593Smuzhiyun 	}
580*4882a593Smuzhiyun 
581*4882a593Smuzhiyun 	return 0;
582*4882a593Smuzhiyun }
583*4882a593Smuzhiyun 
584*4882a593Smuzhiyun void
xpc_activate_partition(struct xpc_partition * part)585*4882a593Smuzhiyun xpc_activate_partition(struct xpc_partition *part)
586*4882a593Smuzhiyun {
587*4882a593Smuzhiyun 	short partid = XPC_PARTID(part);
588*4882a593Smuzhiyun 	unsigned long irq_flags;
589*4882a593Smuzhiyun 	struct task_struct *kthread;
590*4882a593Smuzhiyun 
591*4882a593Smuzhiyun 	spin_lock_irqsave(&part->act_lock, irq_flags);
592*4882a593Smuzhiyun 
593*4882a593Smuzhiyun 	DBUG_ON(part->act_state != XPC_P_AS_INACTIVE);
594*4882a593Smuzhiyun 
595*4882a593Smuzhiyun 	part->act_state = XPC_P_AS_ACTIVATION_REQ;
596*4882a593Smuzhiyun 	XPC_SET_REASON(part, xpCloneKThread, __LINE__);
597*4882a593Smuzhiyun 
598*4882a593Smuzhiyun 	spin_unlock_irqrestore(&part->act_lock, irq_flags);
599*4882a593Smuzhiyun 
600*4882a593Smuzhiyun 	kthread = kthread_run(xpc_activating, (void *)((u64)partid), "xpc%02d",
601*4882a593Smuzhiyun 			      partid);
602*4882a593Smuzhiyun 	if (IS_ERR(kthread)) {
603*4882a593Smuzhiyun 		spin_lock_irqsave(&part->act_lock, irq_flags);
604*4882a593Smuzhiyun 		part->act_state = XPC_P_AS_INACTIVE;
605*4882a593Smuzhiyun 		XPC_SET_REASON(part, xpCloneKThreadFailed, __LINE__);
606*4882a593Smuzhiyun 		spin_unlock_irqrestore(&part->act_lock, irq_flags);
607*4882a593Smuzhiyun 	}
608*4882a593Smuzhiyun }
609*4882a593Smuzhiyun 
610*4882a593Smuzhiyun void
xpc_activate_kthreads(struct xpc_channel * ch,int needed)611*4882a593Smuzhiyun xpc_activate_kthreads(struct xpc_channel *ch, int needed)
612*4882a593Smuzhiyun {
613*4882a593Smuzhiyun 	int idle = atomic_read(&ch->kthreads_idle);
614*4882a593Smuzhiyun 	int assigned = atomic_read(&ch->kthreads_assigned);
615*4882a593Smuzhiyun 	int wakeup;
616*4882a593Smuzhiyun 
617*4882a593Smuzhiyun 	DBUG_ON(needed <= 0);
618*4882a593Smuzhiyun 
619*4882a593Smuzhiyun 	if (idle > 0) {
620*4882a593Smuzhiyun 		wakeup = (needed > idle) ? idle : needed;
621*4882a593Smuzhiyun 		needed -= wakeup;
622*4882a593Smuzhiyun 
623*4882a593Smuzhiyun 		dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, "
624*4882a593Smuzhiyun 			"channel=%d\n", wakeup, ch->partid, ch->number);
625*4882a593Smuzhiyun 
626*4882a593Smuzhiyun 		/* only wakeup the requested number of kthreads */
627*4882a593Smuzhiyun 		wake_up_nr(&ch->idle_wq, wakeup);
628*4882a593Smuzhiyun 	}
629*4882a593Smuzhiyun 
630*4882a593Smuzhiyun 	if (needed <= 0)
631*4882a593Smuzhiyun 		return;
632*4882a593Smuzhiyun 
633*4882a593Smuzhiyun 	if (needed + assigned > ch->kthreads_assigned_limit) {
634*4882a593Smuzhiyun 		needed = ch->kthreads_assigned_limit - assigned;
635*4882a593Smuzhiyun 		if (needed <= 0)
636*4882a593Smuzhiyun 			return;
637*4882a593Smuzhiyun 	}
638*4882a593Smuzhiyun 
639*4882a593Smuzhiyun 	dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n",
640*4882a593Smuzhiyun 		needed, ch->partid, ch->number);
641*4882a593Smuzhiyun 
642*4882a593Smuzhiyun 	xpc_create_kthreads(ch, needed, 0);
643*4882a593Smuzhiyun }
644*4882a593Smuzhiyun 
645*4882a593Smuzhiyun /*
646*4882a593Smuzhiyun  * This function is where XPC's kthreads wait for messages to deliver.
647*4882a593Smuzhiyun  */
648*4882a593Smuzhiyun static void
xpc_kthread_waitmsgs(struct xpc_partition * part,struct xpc_channel * ch)649*4882a593Smuzhiyun xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
650*4882a593Smuzhiyun {
651*4882a593Smuzhiyun 	int (*n_of_deliverable_payloads) (struct xpc_channel *) =
652*4882a593Smuzhiyun 		xpc_arch_ops.n_of_deliverable_payloads;
653*4882a593Smuzhiyun 
654*4882a593Smuzhiyun 	do {
655*4882a593Smuzhiyun 		/* deliver messages to their intended recipients */
656*4882a593Smuzhiyun 
657*4882a593Smuzhiyun 		while (n_of_deliverable_payloads(ch) > 0 &&
658*4882a593Smuzhiyun 		       !(ch->flags & XPC_C_DISCONNECTING)) {
659*4882a593Smuzhiyun 			xpc_deliver_payload(ch);
660*4882a593Smuzhiyun 		}
661*4882a593Smuzhiyun 
662*4882a593Smuzhiyun 		if (atomic_inc_return(&ch->kthreads_idle) >
663*4882a593Smuzhiyun 		    ch->kthreads_idle_limit) {
664*4882a593Smuzhiyun 			/* too many idle kthreads on this channel */
665*4882a593Smuzhiyun 			atomic_dec(&ch->kthreads_idle);
666*4882a593Smuzhiyun 			break;
667*4882a593Smuzhiyun 		}
668*4882a593Smuzhiyun 
669*4882a593Smuzhiyun 		dev_dbg(xpc_chan, "idle kthread calling "
670*4882a593Smuzhiyun 			"wait_event_interruptible_exclusive()\n");
671*4882a593Smuzhiyun 
672*4882a593Smuzhiyun 		(void)wait_event_interruptible_exclusive(ch->idle_wq,
673*4882a593Smuzhiyun 				(n_of_deliverable_payloads(ch) > 0 ||
674*4882a593Smuzhiyun 				 (ch->flags & XPC_C_DISCONNECTING)));
675*4882a593Smuzhiyun 
676*4882a593Smuzhiyun 		atomic_dec(&ch->kthreads_idle);
677*4882a593Smuzhiyun 
678*4882a593Smuzhiyun 	} while (!(ch->flags & XPC_C_DISCONNECTING));
679*4882a593Smuzhiyun }
680*4882a593Smuzhiyun 
681*4882a593Smuzhiyun static int
xpc_kthread_start(void * args)682*4882a593Smuzhiyun xpc_kthread_start(void *args)
683*4882a593Smuzhiyun {
684*4882a593Smuzhiyun 	short partid = XPC_UNPACK_ARG1(args);
685*4882a593Smuzhiyun 	u16 ch_number = XPC_UNPACK_ARG2(args);
686*4882a593Smuzhiyun 	struct xpc_partition *part = &xpc_partitions[partid];
687*4882a593Smuzhiyun 	struct xpc_channel *ch;
688*4882a593Smuzhiyun 	int n_needed;
689*4882a593Smuzhiyun 	unsigned long irq_flags;
690*4882a593Smuzhiyun 	int (*n_of_deliverable_payloads) (struct xpc_channel *) =
691*4882a593Smuzhiyun 		xpc_arch_ops.n_of_deliverable_payloads;
692*4882a593Smuzhiyun 
693*4882a593Smuzhiyun 	dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n",
694*4882a593Smuzhiyun 		partid, ch_number);
695*4882a593Smuzhiyun 
696*4882a593Smuzhiyun 	ch = &part->channels[ch_number];
697*4882a593Smuzhiyun 
698*4882a593Smuzhiyun 	if (!(ch->flags & XPC_C_DISCONNECTING)) {
699*4882a593Smuzhiyun 
700*4882a593Smuzhiyun 		/* let registerer know that connection has been established */
701*4882a593Smuzhiyun 
702*4882a593Smuzhiyun 		spin_lock_irqsave(&ch->lock, irq_flags);
703*4882a593Smuzhiyun 		if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) {
704*4882a593Smuzhiyun 			ch->flags |= XPC_C_CONNECTEDCALLOUT;
705*4882a593Smuzhiyun 			spin_unlock_irqrestore(&ch->lock, irq_flags);
706*4882a593Smuzhiyun 
707*4882a593Smuzhiyun 			xpc_connected_callout(ch);
708*4882a593Smuzhiyun 
709*4882a593Smuzhiyun 			spin_lock_irqsave(&ch->lock, irq_flags);
710*4882a593Smuzhiyun 			ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE;
711*4882a593Smuzhiyun 			spin_unlock_irqrestore(&ch->lock, irq_flags);
712*4882a593Smuzhiyun 
713*4882a593Smuzhiyun 			/*
714*4882a593Smuzhiyun 			 * It is possible that while the callout was being
715*4882a593Smuzhiyun 			 * made that the remote partition sent some messages.
716*4882a593Smuzhiyun 			 * If that is the case, we may need to activate
717*4882a593Smuzhiyun 			 * additional kthreads to help deliver them. We only
718*4882a593Smuzhiyun 			 * need one less than total #of messages to deliver.
719*4882a593Smuzhiyun 			 */
720*4882a593Smuzhiyun 			n_needed = n_of_deliverable_payloads(ch) - 1;
721*4882a593Smuzhiyun 			if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING))
722*4882a593Smuzhiyun 				xpc_activate_kthreads(ch, n_needed);
723*4882a593Smuzhiyun 
724*4882a593Smuzhiyun 		} else {
725*4882a593Smuzhiyun 			spin_unlock_irqrestore(&ch->lock, irq_flags);
726*4882a593Smuzhiyun 		}
727*4882a593Smuzhiyun 
728*4882a593Smuzhiyun 		xpc_kthread_waitmsgs(part, ch);
729*4882a593Smuzhiyun 	}
730*4882a593Smuzhiyun 
731*4882a593Smuzhiyun 	/* let registerer know that connection is disconnecting */
732*4882a593Smuzhiyun 
733*4882a593Smuzhiyun 	spin_lock_irqsave(&ch->lock, irq_flags);
734*4882a593Smuzhiyun 	if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
735*4882a593Smuzhiyun 	    !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
736*4882a593Smuzhiyun 		ch->flags |= XPC_C_DISCONNECTINGCALLOUT;
737*4882a593Smuzhiyun 		spin_unlock_irqrestore(&ch->lock, irq_flags);
738*4882a593Smuzhiyun 
739*4882a593Smuzhiyun 		xpc_disconnect_callout(ch, xpDisconnecting);
740*4882a593Smuzhiyun 
741*4882a593Smuzhiyun 		spin_lock_irqsave(&ch->lock, irq_flags);
742*4882a593Smuzhiyun 		ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE;
743*4882a593Smuzhiyun 	}
744*4882a593Smuzhiyun 	spin_unlock_irqrestore(&ch->lock, irq_flags);
745*4882a593Smuzhiyun 
746*4882a593Smuzhiyun 	if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
747*4882a593Smuzhiyun 	    atomic_dec_return(&part->nchannels_engaged) == 0) {
748*4882a593Smuzhiyun 		xpc_arch_ops.indicate_partition_disengaged(part);
749*4882a593Smuzhiyun 	}
750*4882a593Smuzhiyun 
751*4882a593Smuzhiyun 	xpc_msgqueue_deref(ch);
752*4882a593Smuzhiyun 
753*4882a593Smuzhiyun 	dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n",
754*4882a593Smuzhiyun 		partid, ch_number);
755*4882a593Smuzhiyun 
756*4882a593Smuzhiyun 	xpc_part_deref(part);
757*4882a593Smuzhiyun 	return 0;
758*4882a593Smuzhiyun }
759*4882a593Smuzhiyun 
760*4882a593Smuzhiyun /*
761*4882a593Smuzhiyun  * For each partition that XPC has established communications with, there is
762*4882a593Smuzhiyun  * a minimum of one kernel thread assigned to perform any operation that
763*4882a593Smuzhiyun  * may potentially sleep or block (basically the callouts to the asynchronous
764*4882a593Smuzhiyun  * functions registered via xpc_connect()).
765*4882a593Smuzhiyun  *
766*4882a593Smuzhiyun  * Additional kthreads are created and destroyed by XPC as the workload
767*4882a593Smuzhiyun  * demands.
768*4882a593Smuzhiyun  *
769*4882a593Smuzhiyun  * A kthread is assigned to one of the active channels that exists for a given
770*4882a593Smuzhiyun  * partition.
771*4882a593Smuzhiyun  */
772*4882a593Smuzhiyun void
xpc_create_kthreads(struct xpc_channel * ch,int needed,int ignore_disconnecting)773*4882a593Smuzhiyun xpc_create_kthreads(struct xpc_channel *ch, int needed,
774*4882a593Smuzhiyun 		    int ignore_disconnecting)
775*4882a593Smuzhiyun {
776*4882a593Smuzhiyun 	unsigned long irq_flags;
777*4882a593Smuzhiyun 	u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
778*4882a593Smuzhiyun 	struct xpc_partition *part = &xpc_partitions[ch->partid];
779*4882a593Smuzhiyun 	struct task_struct *kthread;
780*4882a593Smuzhiyun 	void (*indicate_partition_disengaged) (struct xpc_partition *) =
781*4882a593Smuzhiyun 		xpc_arch_ops.indicate_partition_disengaged;
782*4882a593Smuzhiyun 
783*4882a593Smuzhiyun 	while (needed-- > 0) {
784*4882a593Smuzhiyun 
785*4882a593Smuzhiyun 		/*
786*4882a593Smuzhiyun 		 * The following is done on behalf of the newly created
787*4882a593Smuzhiyun 		 * kthread. That kthread is responsible for doing the
788*4882a593Smuzhiyun 		 * counterpart to the following before it exits.
789*4882a593Smuzhiyun 		 */
790*4882a593Smuzhiyun 		if (ignore_disconnecting) {
791*4882a593Smuzhiyun 			if (!atomic_inc_not_zero(&ch->kthreads_assigned)) {
792*4882a593Smuzhiyun 				/* kthreads assigned had gone to zero */
793*4882a593Smuzhiyun 				BUG_ON(!(ch->flags &
794*4882a593Smuzhiyun 					 XPC_C_DISCONNECTINGCALLOUT_MADE));
795*4882a593Smuzhiyun 				break;
796*4882a593Smuzhiyun 			}
797*4882a593Smuzhiyun 
798*4882a593Smuzhiyun 		} else if (ch->flags & XPC_C_DISCONNECTING) {
799*4882a593Smuzhiyun 			break;
800*4882a593Smuzhiyun 
801*4882a593Smuzhiyun 		} else if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
802*4882a593Smuzhiyun 			   atomic_inc_return(&part->nchannels_engaged) == 1) {
803*4882a593Smuzhiyun 			xpc_arch_ops.indicate_partition_engaged(part);
804*4882a593Smuzhiyun 		}
805*4882a593Smuzhiyun 		(void)xpc_part_ref(part);
806*4882a593Smuzhiyun 		xpc_msgqueue_ref(ch);
807*4882a593Smuzhiyun 
808*4882a593Smuzhiyun 		kthread = kthread_run(xpc_kthread_start, (void *)args,
809*4882a593Smuzhiyun 				      "xpc%02dc%d", ch->partid, ch->number);
810*4882a593Smuzhiyun 		if (IS_ERR(kthread)) {
811*4882a593Smuzhiyun 			/* the fork failed */
812*4882a593Smuzhiyun 
813*4882a593Smuzhiyun 			/*
814*4882a593Smuzhiyun 			 * NOTE: if (ignore_disconnecting &&
815*4882a593Smuzhiyun 			 * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true,
816*4882a593Smuzhiyun 			 * then we'll deadlock if all other kthreads assigned
817*4882a593Smuzhiyun 			 * to this channel are blocked in the channel's
818*4882a593Smuzhiyun 			 * registerer, because the only thing that will unblock
819*4882a593Smuzhiyun 			 * them is the xpDisconnecting callout that this
820*4882a593Smuzhiyun 			 * failed kthread_run() would have made.
821*4882a593Smuzhiyun 			 */
822*4882a593Smuzhiyun 
823*4882a593Smuzhiyun 			if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
824*4882a593Smuzhiyun 			    atomic_dec_return(&part->nchannels_engaged) == 0) {
825*4882a593Smuzhiyun 				indicate_partition_disengaged(part);
826*4882a593Smuzhiyun 			}
827*4882a593Smuzhiyun 			xpc_msgqueue_deref(ch);
828*4882a593Smuzhiyun 			xpc_part_deref(part);
829*4882a593Smuzhiyun 
830*4882a593Smuzhiyun 			if (atomic_read(&ch->kthreads_assigned) <
831*4882a593Smuzhiyun 			    ch->kthreads_idle_limit) {
832*4882a593Smuzhiyun 				/*
833*4882a593Smuzhiyun 				 * Flag this as an error only if we have an
834*4882a593Smuzhiyun 				 * insufficient #of kthreads for the channel
835*4882a593Smuzhiyun 				 * to function.
836*4882a593Smuzhiyun 				 */
837*4882a593Smuzhiyun 				spin_lock_irqsave(&ch->lock, irq_flags);
838*4882a593Smuzhiyun 				XPC_DISCONNECT_CHANNEL(ch, xpLackOfResources,
839*4882a593Smuzhiyun 						       &irq_flags);
840*4882a593Smuzhiyun 				spin_unlock_irqrestore(&ch->lock, irq_flags);
841*4882a593Smuzhiyun 			}
842*4882a593Smuzhiyun 			break;
843*4882a593Smuzhiyun 		}
844*4882a593Smuzhiyun 	}
845*4882a593Smuzhiyun }
846*4882a593Smuzhiyun 
847*4882a593Smuzhiyun void
xpc_disconnect_wait(int ch_number)848*4882a593Smuzhiyun xpc_disconnect_wait(int ch_number)
849*4882a593Smuzhiyun {
850*4882a593Smuzhiyun 	unsigned long irq_flags;
851*4882a593Smuzhiyun 	short partid;
852*4882a593Smuzhiyun 	struct xpc_partition *part;
853*4882a593Smuzhiyun 	struct xpc_channel *ch;
854*4882a593Smuzhiyun 	int wakeup_channel_mgr;
855*4882a593Smuzhiyun 
856*4882a593Smuzhiyun 	/* now wait for all callouts to the caller's function to cease */
857*4882a593Smuzhiyun 	for (partid = 0; partid < xp_max_npartitions; partid++) {
858*4882a593Smuzhiyun 		part = &xpc_partitions[partid];
859*4882a593Smuzhiyun 
860*4882a593Smuzhiyun 		if (!xpc_part_ref(part))
861*4882a593Smuzhiyun 			continue;
862*4882a593Smuzhiyun 
863*4882a593Smuzhiyun 		ch = &part->channels[ch_number];
864*4882a593Smuzhiyun 
865*4882a593Smuzhiyun 		if (!(ch->flags & XPC_C_WDISCONNECT)) {
866*4882a593Smuzhiyun 			xpc_part_deref(part);
867*4882a593Smuzhiyun 			continue;
868*4882a593Smuzhiyun 		}
869*4882a593Smuzhiyun 
870*4882a593Smuzhiyun 		wait_for_completion(&ch->wdisconnect_wait);
871*4882a593Smuzhiyun 
872*4882a593Smuzhiyun 		spin_lock_irqsave(&ch->lock, irq_flags);
873*4882a593Smuzhiyun 		DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
874*4882a593Smuzhiyun 		wakeup_channel_mgr = 0;
875*4882a593Smuzhiyun 
876*4882a593Smuzhiyun 		if (ch->delayed_chctl_flags) {
877*4882a593Smuzhiyun 			if (part->act_state != XPC_P_AS_DEACTIVATING) {
878*4882a593Smuzhiyun 				spin_lock(&part->chctl_lock);
879*4882a593Smuzhiyun 				part->chctl.flags[ch->number] |=
880*4882a593Smuzhiyun 				    ch->delayed_chctl_flags;
881*4882a593Smuzhiyun 				spin_unlock(&part->chctl_lock);
882*4882a593Smuzhiyun 				wakeup_channel_mgr = 1;
883*4882a593Smuzhiyun 			}
884*4882a593Smuzhiyun 			ch->delayed_chctl_flags = 0;
885*4882a593Smuzhiyun 		}
886*4882a593Smuzhiyun 
887*4882a593Smuzhiyun 		ch->flags &= ~XPC_C_WDISCONNECT;
888*4882a593Smuzhiyun 		spin_unlock_irqrestore(&ch->lock, irq_flags);
889*4882a593Smuzhiyun 
890*4882a593Smuzhiyun 		if (wakeup_channel_mgr)
891*4882a593Smuzhiyun 			xpc_wakeup_channel_mgr(part);
892*4882a593Smuzhiyun 
893*4882a593Smuzhiyun 		xpc_part_deref(part);
894*4882a593Smuzhiyun 	}
895*4882a593Smuzhiyun }
896*4882a593Smuzhiyun 
897*4882a593Smuzhiyun static int
xpc_setup_partitions(void)898*4882a593Smuzhiyun xpc_setup_partitions(void)
899*4882a593Smuzhiyun {
900*4882a593Smuzhiyun 	short partid;
901*4882a593Smuzhiyun 	struct xpc_partition *part;
902*4882a593Smuzhiyun 
903*4882a593Smuzhiyun 	xpc_partitions = kcalloc(xp_max_npartitions,
904*4882a593Smuzhiyun 				 sizeof(struct xpc_partition),
905*4882a593Smuzhiyun 				 GFP_KERNEL);
906*4882a593Smuzhiyun 	if (xpc_partitions == NULL) {
907*4882a593Smuzhiyun 		dev_err(xpc_part, "can't get memory for partition structure\n");
908*4882a593Smuzhiyun 		return -ENOMEM;
909*4882a593Smuzhiyun 	}
910*4882a593Smuzhiyun 
911*4882a593Smuzhiyun 	/*
912*4882a593Smuzhiyun 	 * The first few fields of each entry of xpc_partitions[] need to
913*4882a593Smuzhiyun 	 * be initialized now so that calls to xpc_connect() and
914*4882a593Smuzhiyun 	 * xpc_disconnect() can be made prior to the activation of any remote
915*4882a593Smuzhiyun 	 * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
916*4882a593Smuzhiyun 	 * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
917*4882a593Smuzhiyun 	 * PARTITION HAS BEEN ACTIVATED.
918*4882a593Smuzhiyun 	 */
919*4882a593Smuzhiyun 	for (partid = 0; partid < xp_max_npartitions; partid++) {
920*4882a593Smuzhiyun 		part = &xpc_partitions[partid];
921*4882a593Smuzhiyun 
922*4882a593Smuzhiyun 		DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
923*4882a593Smuzhiyun 
924*4882a593Smuzhiyun 		part->activate_IRQ_rcvd = 0;
925*4882a593Smuzhiyun 		spin_lock_init(&part->act_lock);
926*4882a593Smuzhiyun 		part->act_state = XPC_P_AS_INACTIVE;
927*4882a593Smuzhiyun 		XPC_SET_REASON(part, 0, 0);
928*4882a593Smuzhiyun 
929*4882a593Smuzhiyun 		timer_setup(&part->disengage_timer,
930*4882a593Smuzhiyun 			    xpc_timeout_partition_disengage, 0);
931*4882a593Smuzhiyun 
932*4882a593Smuzhiyun 		part->setup_state = XPC_P_SS_UNSET;
933*4882a593Smuzhiyun 		init_waitqueue_head(&part->teardown_wq);
934*4882a593Smuzhiyun 		atomic_set(&part->references, 0);
935*4882a593Smuzhiyun 	}
936*4882a593Smuzhiyun 
937*4882a593Smuzhiyun 	return xpc_arch_ops.setup_partitions();
938*4882a593Smuzhiyun }
939*4882a593Smuzhiyun 
940*4882a593Smuzhiyun static void
xpc_teardown_partitions(void)941*4882a593Smuzhiyun xpc_teardown_partitions(void)
942*4882a593Smuzhiyun {
943*4882a593Smuzhiyun 	xpc_arch_ops.teardown_partitions();
944*4882a593Smuzhiyun 	kfree(xpc_partitions);
945*4882a593Smuzhiyun }
946*4882a593Smuzhiyun 
947*4882a593Smuzhiyun static void
xpc_do_exit(enum xp_retval reason)948*4882a593Smuzhiyun xpc_do_exit(enum xp_retval reason)
949*4882a593Smuzhiyun {
950*4882a593Smuzhiyun 	short partid;
951*4882a593Smuzhiyun 	int active_part_count, printed_waiting_msg = 0;
952*4882a593Smuzhiyun 	struct xpc_partition *part;
953*4882a593Smuzhiyun 	unsigned long printmsg_time, disengage_timeout = 0;
954*4882a593Smuzhiyun 
955*4882a593Smuzhiyun 	/* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
956*4882a593Smuzhiyun 	DBUG_ON(xpc_exiting == 1);
957*4882a593Smuzhiyun 
958*4882a593Smuzhiyun 	/*
959*4882a593Smuzhiyun 	 * Let the heartbeat checker thread and the discovery thread
960*4882a593Smuzhiyun 	 * (if one is running) know that they should exit. Also wake up
961*4882a593Smuzhiyun 	 * the heartbeat checker thread in case it's sleeping.
962*4882a593Smuzhiyun 	 */
963*4882a593Smuzhiyun 	xpc_exiting = 1;
964*4882a593Smuzhiyun 	wake_up_interruptible(&xpc_activate_IRQ_wq);
965*4882a593Smuzhiyun 
966*4882a593Smuzhiyun 	/* wait for the discovery thread to exit */
967*4882a593Smuzhiyun 	wait_for_completion(&xpc_discovery_exited);
968*4882a593Smuzhiyun 
969*4882a593Smuzhiyun 	/* wait for the heartbeat checker thread to exit */
970*4882a593Smuzhiyun 	wait_for_completion(&xpc_hb_checker_exited);
971*4882a593Smuzhiyun 
972*4882a593Smuzhiyun 	/* sleep for a 1/3 of a second or so */
973*4882a593Smuzhiyun 	(void)msleep_interruptible(300);
974*4882a593Smuzhiyun 
975*4882a593Smuzhiyun 	/* wait for all partitions to become inactive */
976*4882a593Smuzhiyun 
977*4882a593Smuzhiyun 	printmsg_time = jiffies + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
978*4882a593Smuzhiyun 	xpc_disengage_timedout = 0;
979*4882a593Smuzhiyun 
980*4882a593Smuzhiyun 	do {
981*4882a593Smuzhiyun 		active_part_count = 0;
982*4882a593Smuzhiyun 
983*4882a593Smuzhiyun 		for (partid = 0; partid < xp_max_npartitions; partid++) {
984*4882a593Smuzhiyun 			part = &xpc_partitions[partid];
985*4882a593Smuzhiyun 
986*4882a593Smuzhiyun 			if (xpc_partition_disengaged(part) &&
987*4882a593Smuzhiyun 			    part->act_state == XPC_P_AS_INACTIVE) {
988*4882a593Smuzhiyun 				continue;
989*4882a593Smuzhiyun 			}
990*4882a593Smuzhiyun 
991*4882a593Smuzhiyun 			active_part_count++;
992*4882a593Smuzhiyun 
993*4882a593Smuzhiyun 			XPC_DEACTIVATE_PARTITION(part, reason);
994*4882a593Smuzhiyun 
995*4882a593Smuzhiyun 			if (part->disengage_timeout > disengage_timeout)
996*4882a593Smuzhiyun 				disengage_timeout = part->disengage_timeout;
997*4882a593Smuzhiyun 		}
998*4882a593Smuzhiyun 
999*4882a593Smuzhiyun 		if (xpc_arch_ops.any_partition_engaged()) {
1000*4882a593Smuzhiyun 			if (time_is_before_jiffies(printmsg_time)) {
1001*4882a593Smuzhiyun 				dev_info(xpc_part, "waiting for remote "
1002*4882a593Smuzhiyun 					 "partitions to deactivate, timeout in "
1003*4882a593Smuzhiyun 					 "%ld seconds\n", (disengage_timeout -
1004*4882a593Smuzhiyun 					 jiffies) / HZ);
1005*4882a593Smuzhiyun 				printmsg_time = jiffies +
1006*4882a593Smuzhiyun 				    (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
1007*4882a593Smuzhiyun 				printed_waiting_msg = 1;
1008*4882a593Smuzhiyun 			}
1009*4882a593Smuzhiyun 
1010*4882a593Smuzhiyun 		} else if (active_part_count > 0) {
1011*4882a593Smuzhiyun 			if (printed_waiting_msg) {
1012*4882a593Smuzhiyun 				dev_info(xpc_part, "waiting for local partition"
1013*4882a593Smuzhiyun 					 " to deactivate\n");
1014*4882a593Smuzhiyun 				printed_waiting_msg = 0;
1015*4882a593Smuzhiyun 			}
1016*4882a593Smuzhiyun 
1017*4882a593Smuzhiyun 		} else {
1018*4882a593Smuzhiyun 			if (!xpc_disengage_timedout) {
1019*4882a593Smuzhiyun 				dev_info(xpc_part, "all partitions have "
1020*4882a593Smuzhiyun 					 "deactivated\n");
1021*4882a593Smuzhiyun 			}
1022*4882a593Smuzhiyun 			break;
1023*4882a593Smuzhiyun 		}
1024*4882a593Smuzhiyun 
1025*4882a593Smuzhiyun 		/* sleep for a 1/3 of a second or so */
1026*4882a593Smuzhiyun 		(void)msleep_interruptible(300);
1027*4882a593Smuzhiyun 
1028*4882a593Smuzhiyun 	} while (1);
1029*4882a593Smuzhiyun 
1030*4882a593Smuzhiyun 	DBUG_ON(xpc_arch_ops.any_partition_engaged());
1031*4882a593Smuzhiyun 
1032*4882a593Smuzhiyun 	xpc_teardown_rsvd_page();
1033*4882a593Smuzhiyun 
1034*4882a593Smuzhiyun 	if (reason == xpUnloading) {
1035*4882a593Smuzhiyun 		(void)unregister_die_notifier(&xpc_die_notifier);
1036*4882a593Smuzhiyun 		(void)unregister_reboot_notifier(&xpc_reboot_notifier);
1037*4882a593Smuzhiyun 	}
1038*4882a593Smuzhiyun 
1039*4882a593Smuzhiyun 	/* clear the interface to XPC's functions */
1040*4882a593Smuzhiyun 	xpc_clear_interface();
1041*4882a593Smuzhiyun 
1042*4882a593Smuzhiyun 	if (xpc_sysctl)
1043*4882a593Smuzhiyun 		unregister_sysctl_table(xpc_sysctl);
1044*4882a593Smuzhiyun 
1045*4882a593Smuzhiyun 	xpc_teardown_partitions();
1046*4882a593Smuzhiyun 
1047*4882a593Smuzhiyun 	if (is_uv_system())
1048*4882a593Smuzhiyun 		xpc_exit_uv();
1049*4882a593Smuzhiyun }
1050*4882a593Smuzhiyun 
1051*4882a593Smuzhiyun /*
1052*4882a593Smuzhiyun  * This function is called when the system is being rebooted.
1053*4882a593Smuzhiyun  */
1054*4882a593Smuzhiyun static int
xpc_system_reboot(struct notifier_block * nb,unsigned long event,void * unused)1055*4882a593Smuzhiyun xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
1056*4882a593Smuzhiyun {
1057*4882a593Smuzhiyun 	enum xp_retval reason;
1058*4882a593Smuzhiyun 
1059*4882a593Smuzhiyun 	switch (event) {
1060*4882a593Smuzhiyun 	case SYS_RESTART:
1061*4882a593Smuzhiyun 		reason = xpSystemReboot;
1062*4882a593Smuzhiyun 		break;
1063*4882a593Smuzhiyun 	case SYS_HALT:
1064*4882a593Smuzhiyun 		reason = xpSystemHalt;
1065*4882a593Smuzhiyun 		break;
1066*4882a593Smuzhiyun 	case SYS_POWER_OFF:
1067*4882a593Smuzhiyun 		reason = xpSystemPoweroff;
1068*4882a593Smuzhiyun 		break;
1069*4882a593Smuzhiyun 	default:
1070*4882a593Smuzhiyun 		reason = xpSystemGoingDown;
1071*4882a593Smuzhiyun 	}
1072*4882a593Smuzhiyun 
1073*4882a593Smuzhiyun 	xpc_do_exit(reason);
1074*4882a593Smuzhiyun 	return NOTIFY_DONE;
1075*4882a593Smuzhiyun }
1076*4882a593Smuzhiyun 
1077*4882a593Smuzhiyun /* Used to only allow one cpu to complete disconnect */
1078*4882a593Smuzhiyun static unsigned int xpc_die_disconnecting;
1079*4882a593Smuzhiyun 
1080*4882a593Smuzhiyun /*
1081*4882a593Smuzhiyun  * Notify other partitions to deactivate from us by first disengaging from all
1082*4882a593Smuzhiyun  * references to our memory.
1083*4882a593Smuzhiyun  */
1084*4882a593Smuzhiyun static void
xpc_die_deactivate(void)1085*4882a593Smuzhiyun xpc_die_deactivate(void)
1086*4882a593Smuzhiyun {
1087*4882a593Smuzhiyun 	struct xpc_partition *part;
1088*4882a593Smuzhiyun 	short partid;
1089*4882a593Smuzhiyun 	int any_engaged;
1090*4882a593Smuzhiyun 	long keep_waiting;
1091*4882a593Smuzhiyun 	long wait_to_print;
1092*4882a593Smuzhiyun 
1093*4882a593Smuzhiyun 	if (cmpxchg(&xpc_die_disconnecting, 0, 1))
1094*4882a593Smuzhiyun 		return;
1095*4882a593Smuzhiyun 
1096*4882a593Smuzhiyun 	/* keep xpc_hb_checker thread from doing anything (just in case) */
1097*4882a593Smuzhiyun 	xpc_exiting = 1;
1098*4882a593Smuzhiyun 
1099*4882a593Smuzhiyun 	xpc_arch_ops.disallow_all_hbs();   /*indicate we're deactivated */
1100*4882a593Smuzhiyun 
1101*4882a593Smuzhiyun 	for (partid = 0; partid < xp_max_npartitions; partid++) {
1102*4882a593Smuzhiyun 		part = &xpc_partitions[partid];
1103*4882a593Smuzhiyun 
1104*4882a593Smuzhiyun 		if (xpc_arch_ops.partition_engaged(partid) ||
1105*4882a593Smuzhiyun 		    part->act_state != XPC_P_AS_INACTIVE) {
1106*4882a593Smuzhiyun 			xpc_arch_ops.request_partition_deactivation(part);
1107*4882a593Smuzhiyun 			xpc_arch_ops.indicate_partition_disengaged(part);
1108*4882a593Smuzhiyun 		}
1109*4882a593Smuzhiyun 	}
1110*4882a593Smuzhiyun 
1111*4882a593Smuzhiyun 	/*
1112*4882a593Smuzhiyun 	 * Though we requested that all other partitions deactivate from us,
1113*4882a593Smuzhiyun 	 * we only wait until they've all disengaged or we've reached the
1114*4882a593Smuzhiyun 	 * defined timelimit.
1115*4882a593Smuzhiyun 	 *
1116*4882a593Smuzhiyun 	 * Given that one iteration through the following while-loop takes
1117*4882a593Smuzhiyun 	 * approximately 200 microseconds, calculate the #of loops to take
1118*4882a593Smuzhiyun 	 * before bailing and the #of loops before printing a waiting message.
1119*4882a593Smuzhiyun 	 */
1120*4882a593Smuzhiyun 	keep_waiting = xpc_disengage_timelimit * 1000 * 5;
1121*4882a593Smuzhiyun 	wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5;
1122*4882a593Smuzhiyun 
1123*4882a593Smuzhiyun 	while (1) {
1124*4882a593Smuzhiyun 		any_engaged = xpc_arch_ops.any_partition_engaged();
1125*4882a593Smuzhiyun 		if (!any_engaged) {
1126*4882a593Smuzhiyun 			dev_info(xpc_part, "all partitions have deactivated\n");
1127*4882a593Smuzhiyun 			break;
1128*4882a593Smuzhiyun 		}
1129*4882a593Smuzhiyun 
1130*4882a593Smuzhiyun 		if (!keep_waiting--) {
1131*4882a593Smuzhiyun 			for (partid = 0; partid < xp_max_npartitions;
1132*4882a593Smuzhiyun 			     partid++) {
1133*4882a593Smuzhiyun 				if (xpc_arch_ops.partition_engaged(partid)) {
1134*4882a593Smuzhiyun 					dev_info(xpc_part, "deactivate from "
1135*4882a593Smuzhiyun 						 "remote partition %d timed "
1136*4882a593Smuzhiyun 						 "out\n", partid);
1137*4882a593Smuzhiyun 				}
1138*4882a593Smuzhiyun 			}
1139*4882a593Smuzhiyun 			break;
1140*4882a593Smuzhiyun 		}
1141*4882a593Smuzhiyun 
1142*4882a593Smuzhiyun 		if (!wait_to_print--) {
1143*4882a593Smuzhiyun 			dev_info(xpc_part, "waiting for remote partitions to "
1144*4882a593Smuzhiyun 				 "deactivate, timeout in %ld seconds\n",
1145*4882a593Smuzhiyun 				 keep_waiting / (1000 * 5));
1146*4882a593Smuzhiyun 			wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL *
1147*4882a593Smuzhiyun 			    1000 * 5;
1148*4882a593Smuzhiyun 		}
1149*4882a593Smuzhiyun 
1150*4882a593Smuzhiyun 		udelay(200);
1151*4882a593Smuzhiyun 	}
1152*4882a593Smuzhiyun }
1153*4882a593Smuzhiyun 
1154*4882a593Smuzhiyun /*
1155*4882a593Smuzhiyun  * This function is called when the system is being restarted or halted due
1156*4882a593Smuzhiyun  * to some sort of system failure. If this is the case we need to notify the
1157*4882a593Smuzhiyun  * other partitions to disengage from all references to our memory.
1158*4882a593Smuzhiyun  * This function can also be called when our heartbeater could be offlined
1159*4882a593Smuzhiyun  * for a time. In this case we need to notify other partitions to not worry
1160*4882a593Smuzhiyun  * about the lack of a heartbeat.
1161*4882a593Smuzhiyun  */
1162*4882a593Smuzhiyun static int
xpc_system_die(struct notifier_block * nb,unsigned long event,void * _die_args)1163*4882a593Smuzhiyun xpc_system_die(struct notifier_block *nb, unsigned long event, void *_die_args)
1164*4882a593Smuzhiyun {
1165*4882a593Smuzhiyun #ifdef CONFIG_IA64		/* !!! temporary kludge */
1166*4882a593Smuzhiyun 	switch (event) {
1167*4882a593Smuzhiyun 	case DIE_MACHINE_RESTART:
1168*4882a593Smuzhiyun 	case DIE_MACHINE_HALT:
1169*4882a593Smuzhiyun 		xpc_die_deactivate();
1170*4882a593Smuzhiyun 		break;
1171*4882a593Smuzhiyun 
1172*4882a593Smuzhiyun 	case DIE_KDEBUG_ENTER:
1173*4882a593Smuzhiyun 		/* Should lack of heartbeat be ignored by other partitions? */
1174*4882a593Smuzhiyun 		if (!xpc_kdebug_ignore)
1175*4882a593Smuzhiyun 			break;
1176*4882a593Smuzhiyun 
1177*4882a593Smuzhiyun 		fallthrough;
1178*4882a593Smuzhiyun 	case DIE_MCA_MONARCH_ENTER:
1179*4882a593Smuzhiyun 	case DIE_INIT_MONARCH_ENTER:
1180*4882a593Smuzhiyun 		xpc_arch_ops.offline_heartbeat();
1181*4882a593Smuzhiyun 		break;
1182*4882a593Smuzhiyun 
1183*4882a593Smuzhiyun 	case DIE_KDEBUG_LEAVE:
1184*4882a593Smuzhiyun 		/* Is lack of heartbeat being ignored by other partitions? */
1185*4882a593Smuzhiyun 		if (!xpc_kdebug_ignore)
1186*4882a593Smuzhiyun 			break;
1187*4882a593Smuzhiyun 
1188*4882a593Smuzhiyun 		fallthrough;
1189*4882a593Smuzhiyun 	case DIE_MCA_MONARCH_LEAVE:
1190*4882a593Smuzhiyun 	case DIE_INIT_MONARCH_LEAVE:
1191*4882a593Smuzhiyun 		xpc_arch_ops.online_heartbeat();
1192*4882a593Smuzhiyun 		break;
1193*4882a593Smuzhiyun 	}
1194*4882a593Smuzhiyun #else
1195*4882a593Smuzhiyun 	struct die_args *die_args = _die_args;
1196*4882a593Smuzhiyun 
1197*4882a593Smuzhiyun 	switch (event) {
1198*4882a593Smuzhiyun 	case DIE_TRAP:
1199*4882a593Smuzhiyun 		if (die_args->trapnr == X86_TRAP_DF)
1200*4882a593Smuzhiyun 			xpc_die_deactivate();
1201*4882a593Smuzhiyun 
1202*4882a593Smuzhiyun 		if (((die_args->trapnr == X86_TRAP_MF) ||
1203*4882a593Smuzhiyun 		     (die_args->trapnr == X86_TRAP_XF)) &&
1204*4882a593Smuzhiyun 		    !user_mode(die_args->regs))
1205*4882a593Smuzhiyun 			xpc_die_deactivate();
1206*4882a593Smuzhiyun 
1207*4882a593Smuzhiyun 		break;
1208*4882a593Smuzhiyun 	case DIE_INT3:
1209*4882a593Smuzhiyun 	case DIE_DEBUG:
1210*4882a593Smuzhiyun 		break;
1211*4882a593Smuzhiyun 	case DIE_OOPS:
1212*4882a593Smuzhiyun 	case DIE_GPF:
1213*4882a593Smuzhiyun 	default:
1214*4882a593Smuzhiyun 		xpc_die_deactivate();
1215*4882a593Smuzhiyun 	}
1216*4882a593Smuzhiyun #endif
1217*4882a593Smuzhiyun 
1218*4882a593Smuzhiyun 	return NOTIFY_DONE;
1219*4882a593Smuzhiyun }
1220*4882a593Smuzhiyun 
1221*4882a593Smuzhiyun static int __init
xpc_init(void)1222*4882a593Smuzhiyun xpc_init(void)
1223*4882a593Smuzhiyun {
1224*4882a593Smuzhiyun 	int ret;
1225*4882a593Smuzhiyun 	struct task_struct *kthread;
1226*4882a593Smuzhiyun 
1227*4882a593Smuzhiyun 	dev_set_name(xpc_part, "part");
1228*4882a593Smuzhiyun 	dev_set_name(xpc_chan, "chan");
1229*4882a593Smuzhiyun 
1230*4882a593Smuzhiyun 	if (is_uv_system()) {
1231*4882a593Smuzhiyun 		ret = xpc_init_uv();
1232*4882a593Smuzhiyun 
1233*4882a593Smuzhiyun 	} else {
1234*4882a593Smuzhiyun 		ret = -ENODEV;
1235*4882a593Smuzhiyun 	}
1236*4882a593Smuzhiyun 
1237*4882a593Smuzhiyun 	if (ret != 0)
1238*4882a593Smuzhiyun 		return ret;
1239*4882a593Smuzhiyun 
1240*4882a593Smuzhiyun 	ret = xpc_setup_partitions();
1241*4882a593Smuzhiyun 	if (ret != 0) {
1242*4882a593Smuzhiyun 		dev_err(xpc_part, "can't get memory for partition structure\n");
1243*4882a593Smuzhiyun 		goto out_1;
1244*4882a593Smuzhiyun 	}
1245*4882a593Smuzhiyun 
1246*4882a593Smuzhiyun 	xpc_sysctl = register_sysctl_table(xpc_sys_dir);
1247*4882a593Smuzhiyun 
1248*4882a593Smuzhiyun 	/*
1249*4882a593Smuzhiyun 	 * Fill the partition reserved page with the information needed by
1250*4882a593Smuzhiyun 	 * other partitions to discover we are alive and establish initial
1251*4882a593Smuzhiyun 	 * communications.
1252*4882a593Smuzhiyun 	 */
1253*4882a593Smuzhiyun 	ret = xpc_setup_rsvd_page();
1254*4882a593Smuzhiyun 	if (ret != 0) {
1255*4882a593Smuzhiyun 		dev_err(xpc_part, "can't setup our reserved page\n");
1256*4882a593Smuzhiyun 		goto out_2;
1257*4882a593Smuzhiyun 	}
1258*4882a593Smuzhiyun 
1259*4882a593Smuzhiyun 	/* add ourselves to the reboot_notifier_list */
1260*4882a593Smuzhiyun 	ret = register_reboot_notifier(&xpc_reboot_notifier);
1261*4882a593Smuzhiyun 	if (ret != 0)
1262*4882a593Smuzhiyun 		dev_warn(xpc_part, "can't register reboot notifier\n");
1263*4882a593Smuzhiyun 
1264*4882a593Smuzhiyun 	/* add ourselves to the die_notifier list */
1265*4882a593Smuzhiyun 	ret = register_die_notifier(&xpc_die_notifier);
1266*4882a593Smuzhiyun 	if (ret != 0)
1267*4882a593Smuzhiyun 		dev_warn(xpc_part, "can't register die notifier\n");
1268*4882a593Smuzhiyun 
1269*4882a593Smuzhiyun 	/*
1270*4882a593Smuzhiyun 	 * The real work-horse behind xpc.  This processes incoming
1271*4882a593Smuzhiyun 	 * interrupts and monitors remote heartbeats.
1272*4882a593Smuzhiyun 	 */
1273*4882a593Smuzhiyun 	kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME);
1274*4882a593Smuzhiyun 	if (IS_ERR(kthread)) {
1275*4882a593Smuzhiyun 		dev_err(xpc_part, "failed while forking hb check thread\n");
1276*4882a593Smuzhiyun 		ret = -EBUSY;
1277*4882a593Smuzhiyun 		goto out_3;
1278*4882a593Smuzhiyun 	}
1279*4882a593Smuzhiyun 
1280*4882a593Smuzhiyun 	/*
1281*4882a593Smuzhiyun 	 * Startup a thread that will attempt to discover other partitions to
1282*4882a593Smuzhiyun 	 * activate based on info provided by SAL. This new thread is short
1283*4882a593Smuzhiyun 	 * lived and will exit once discovery is complete.
1284*4882a593Smuzhiyun 	 */
1285*4882a593Smuzhiyun 	kthread = kthread_run(xpc_initiate_discovery, NULL,
1286*4882a593Smuzhiyun 			      XPC_DISCOVERY_THREAD_NAME);
1287*4882a593Smuzhiyun 	if (IS_ERR(kthread)) {
1288*4882a593Smuzhiyun 		dev_err(xpc_part, "failed while forking discovery thread\n");
1289*4882a593Smuzhiyun 
1290*4882a593Smuzhiyun 		/* mark this new thread as a non-starter */
1291*4882a593Smuzhiyun 		complete(&xpc_discovery_exited);
1292*4882a593Smuzhiyun 
1293*4882a593Smuzhiyun 		xpc_do_exit(xpUnloading);
1294*4882a593Smuzhiyun 		return -EBUSY;
1295*4882a593Smuzhiyun 	}
1296*4882a593Smuzhiyun 
1297*4882a593Smuzhiyun 	/* set the interface to point at XPC's functions */
1298*4882a593Smuzhiyun 	xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect,
1299*4882a593Smuzhiyun 			  xpc_initiate_send, xpc_initiate_send_notify,
1300*4882a593Smuzhiyun 			  xpc_initiate_received, xpc_initiate_partid_to_nasids);
1301*4882a593Smuzhiyun 
1302*4882a593Smuzhiyun 	return 0;
1303*4882a593Smuzhiyun 
1304*4882a593Smuzhiyun 	/* initialization was not successful */
1305*4882a593Smuzhiyun out_3:
1306*4882a593Smuzhiyun 	xpc_teardown_rsvd_page();
1307*4882a593Smuzhiyun 
1308*4882a593Smuzhiyun 	(void)unregister_die_notifier(&xpc_die_notifier);
1309*4882a593Smuzhiyun 	(void)unregister_reboot_notifier(&xpc_reboot_notifier);
1310*4882a593Smuzhiyun out_2:
1311*4882a593Smuzhiyun 	if (xpc_sysctl)
1312*4882a593Smuzhiyun 		unregister_sysctl_table(xpc_sysctl);
1313*4882a593Smuzhiyun 
1314*4882a593Smuzhiyun 	xpc_teardown_partitions();
1315*4882a593Smuzhiyun out_1:
1316*4882a593Smuzhiyun 	if (is_uv_system())
1317*4882a593Smuzhiyun 		xpc_exit_uv();
1318*4882a593Smuzhiyun 	return ret;
1319*4882a593Smuzhiyun }
1320*4882a593Smuzhiyun 
1321*4882a593Smuzhiyun module_init(xpc_init);
1322*4882a593Smuzhiyun 
1323*4882a593Smuzhiyun static void __exit
xpc_exit(void)1324*4882a593Smuzhiyun xpc_exit(void)
1325*4882a593Smuzhiyun {
1326*4882a593Smuzhiyun 	xpc_do_exit(xpUnloading);
1327*4882a593Smuzhiyun }
1328*4882a593Smuzhiyun 
1329*4882a593Smuzhiyun module_exit(xpc_exit);
1330*4882a593Smuzhiyun 
1331*4882a593Smuzhiyun MODULE_AUTHOR("Silicon Graphics, Inc.");
1332*4882a593Smuzhiyun MODULE_DESCRIPTION("Cross Partition Communication (XPC) support");
1333*4882a593Smuzhiyun MODULE_LICENSE("GPL");
1334*4882a593Smuzhiyun 
1335*4882a593Smuzhiyun module_param(xpc_hb_interval, int, 0);
1336*4882a593Smuzhiyun MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between "
1337*4882a593Smuzhiyun 		 "heartbeat increments.");
1338*4882a593Smuzhiyun 
1339*4882a593Smuzhiyun module_param(xpc_hb_check_interval, int, 0);
1340*4882a593Smuzhiyun MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
1341*4882a593Smuzhiyun 		 "heartbeat checks.");
1342*4882a593Smuzhiyun 
1343*4882a593Smuzhiyun module_param(xpc_disengage_timelimit, int, 0);
1344*4882a593Smuzhiyun MODULE_PARM_DESC(xpc_disengage_timelimit, "Number of seconds to wait "
1345*4882a593Smuzhiyun 		 "for disengage to complete.");
1346*4882a593Smuzhiyun 
1347*4882a593Smuzhiyun module_param(xpc_kdebug_ignore, int, 0);
1348*4882a593Smuzhiyun MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
1349*4882a593Smuzhiyun 		 "other partitions when dropping into kdebug.");
1350