1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun * This file is subject to the terms and conditions of the GNU General Public
3*4882a593Smuzhiyun * License. See the file "COPYING" in the main directory of this archive
4*4882a593Smuzhiyun * for more details.
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * (C) Copyright 2020 Hewlett Packard Enterprise Development LP
7*4882a593Smuzhiyun * Copyright (c) 2004-2009 Silicon Graphics, Inc. All Rights Reserved.
8*4882a593Smuzhiyun */
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun /*
11*4882a593Smuzhiyun * Cross Partition Communication (XPC) support - standard version.
12*4882a593Smuzhiyun *
13*4882a593Smuzhiyun * XPC provides a message passing capability that crosses partition
14*4882a593Smuzhiyun * boundaries. This module is made up of two parts:
15*4882a593Smuzhiyun *
16*4882a593Smuzhiyun * partition This part detects the presence/absence of other
17*4882a593Smuzhiyun * partitions. It provides a heartbeat and monitors
18*4882a593Smuzhiyun * the heartbeats of other partitions.
19*4882a593Smuzhiyun *
20*4882a593Smuzhiyun * channel This part manages the channels and sends/receives
21*4882a593Smuzhiyun * messages across them to/from other partitions.
22*4882a593Smuzhiyun *
23*4882a593Smuzhiyun * There are a couple of additional functions residing in XP, which
24*4882a593Smuzhiyun * provide an interface to XPC for its users.
25*4882a593Smuzhiyun *
26*4882a593Smuzhiyun *
27*4882a593Smuzhiyun * Caveats:
28*4882a593Smuzhiyun *
29*4882a593Smuzhiyun * . Currently on sn2, we have no way to determine which nasid an IRQ
30*4882a593Smuzhiyun * came from. Thus, xpc_send_IRQ_sn2() does a remote amo write
31*4882a593Smuzhiyun * followed by an IPI. The amo indicates where data is to be pulled
32*4882a593Smuzhiyun * from, so after the IPI arrives, the remote partition checks the amo
33*4882a593Smuzhiyun * word. The IPI can actually arrive before the amo however, so other
34*4882a593Smuzhiyun * code must periodically check for this case. Also, remote amo
35*4882a593Smuzhiyun * operations do not reliably time out. Thus we do a remote PIO read
36*4882a593Smuzhiyun * solely to know whether the remote partition is down and whether we
37*4882a593Smuzhiyun * should stop sending IPIs to it. This remote PIO read operation is
38*4882a593Smuzhiyun * set up in a special nofault region so SAL knows to ignore (and
39*4882a593Smuzhiyun * cleanup) any errors due to the remote amo write, PIO read, and/or
40*4882a593Smuzhiyun * PIO write operations.
41*4882a593Smuzhiyun *
42*4882a593Smuzhiyun * If/when new hardware solves this IPI problem, we should abandon
43*4882a593Smuzhiyun * the current approach.
44*4882a593Smuzhiyun *
45*4882a593Smuzhiyun */
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun #include <linux/module.h>
48*4882a593Smuzhiyun #include <linux/slab.h>
49*4882a593Smuzhiyun #include <linux/sysctl.h>
50*4882a593Smuzhiyun #include <linux/device.h>
51*4882a593Smuzhiyun #include <linux/delay.h>
52*4882a593Smuzhiyun #include <linux/reboot.h>
53*4882a593Smuzhiyun #include <linux/kdebug.h>
54*4882a593Smuzhiyun #include <linux/kthread.h>
55*4882a593Smuzhiyun #include "xpc.h"
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun #ifdef CONFIG_X86_64
58*4882a593Smuzhiyun #include <asm/traps.h>
59*4882a593Smuzhiyun #endif
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun /* define two XPC debug device structures to be used with dev_dbg() et al */
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun static struct device_driver xpc_dbg_name = {
64*4882a593Smuzhiyun .name = "xpc"
65*4882a593Smuzhiyun };
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun static struct device xpc_part_dbg_subname = {
68*4882a593Smuzhiyun .init_name = "", /* set to "part" at xpc_init() time */
69*4882a593Smuzhiyun .driver = &xpc_dbg_name
70*4882a593Smuzhiyun };
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun static struct device xpc_chan_dbg_subname = {
73*4882a593Smuzhiyun .init_name = "", /* set to "chan" at xpc_init() time */
74*4882a593Smuzhiyun .driver = &xpc_dbg_name
75*4882a593Smuzhiyun };
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun struct device *xpc_part = &xpc_part_dbg_subname;
78*4882a593Smuzhiyun struct device *xpc_chan = &xpc_chan_dbg_subname;
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun static int xpc_kdebug_ignore;
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun /* systune related variables for /proc/sys directories */
83*4882a593Smuzhiyun
84*4882a593Smuzhiyun static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
85*4882a593Smuzhiyun static int xpc_hb_min_interval = 1;
86*4882a593Smuzhiyun static int xpc_hb_max_interval = 10;
87*4882a593Smuzhiyun
88*4882a593Smuzhiyun static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
89*4882a593Smuzhiyun static int xpc_hb_check_min_interval = 10;
90*4882a593Smuzhiyun static int xpc_hb_check_max_interval = 120;
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun int xpc_disengage_timelimit = XPC_DISENGAGE_DEFAULT_TIMELIMIT;
93*4882a593Smuzhiyun static int xpc_disengage_min_timelimit; /* = 0 */
94*4882a593Smuzhiyun static int xpc_disengage_max_timelimit = 120;
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun static struct ctl_table xpc_sys_xpc_hb_dir[] = {
97*4882a593Smuzhiyun {
98*4882a593Smuzhiyun .procname = "hb_interval",
99*4882a593Smuzhiyun .data = &xpc_hb_interval,
100*4882a593Smuzhiyun .maxlen = sizeof(int),
101*4882a593Smuzhiyun .mode = 0644,
102*4882a593Smuzhiyun .proc_handler = proc_dointvec_minmax,
103*4882a593Smuzhiyun .extra1 = &xpc_hb_min_interval,
104*4882a593Smuzhiyun .extra2 = &xpc_hb_max_interval},
105*4882a593Smuzhiyun {
106*4882a593Smuzhiyun .procname = "hb_check_interval",
107*4882a593Smuzhiyun .data = &xpc_hb_check_interval,
108*4882a593Smuzhiyun .maxlen = sizeof(int),
109*4882a593Smuzhiyun .mode = 0644,
110*4882a593Smuzhiyun .proc_handler = proc_dointvec_minmax,
111*4882a593Smuzhiyun .extra1 = &xpc_hb_check_min_interval,
112*4882a593Smuzhiyun .extra2 = &xpc_hb_check_max_interval},
113*4882a593Smuzhiyun {}
114*4882a593Smuzhiyun };
115*4882a593Smuzhiyun static struct ctl_table xpc_sys_xpc_dir[] = {
116*4882a593Smuzhiyun {
117*4882a593Smuzhiyun .procname = "hb",
118*4882a593Smuzhiyun .mode = 0555,
119*4882a593Smuzhiyun .child = xpc_sys_xpc_hb_dir},
120*4882a593Smuzhiyun {
121*4882a593Smuzhiyun .procname = "disengage_timelimit",
122*4882a593Smuzhiyun .data = &xpc_disengage_timelimit,
123*4882a593Smuzhiyun .maxlen = sizeof(int),
124*4882a593Smuzhiyun .mode = 0644,
125*4882a593Smuzhiyun .proc_handler = proc_dointvec_minmax,
126*4882a593Smuzhiyun .extra1 = &xpc_disengage_min_timelimit,
127*4882a593Smuzhiyun .extra2 = &xpc_disengage_max_timelimit},
128*4882a593Smuzhiyun {}
129*4882a593Smuzhiyun };
130*4882a593Smuzhiyun static struct ctl_table xpc_sys_dir[] = {
131*4882a593Smuzhiyun {
132*4882a593Smuzhiyun .procname = "xpc",
133*4882a593Smuzhiyun .mode = 0555,
134*4882a593Smuzhiyun .child = xpc_sys_xpc_dir},
135*4882a593Smuzhiyun {}
136*4882a593Smuzhiyun };
137*4882a593Smuzhiyun static struct ctl_table_header *xpc_sysctl;
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun /* non-zero if any remote partition disengage was timed out */
140*4882a593Smuzhiyun int xpc_disengage_timedout;
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun /* #of activate IRQs received and not yet processed */
143*4882a593Smuzhiyun int xpc_activate_IRQ_rcvd;
144*4882a593Smuzhiyun DEFINE_SPINLOCK(xpc_activate_IRQ_rcvd_lock);
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun /* IRQ handler notifies this wait queue on receipt of an IRQ */
147*4882a593Smuzhiyun DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq);
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun static unsigned long xpc_hb_check_timeout;
150*4882a593Smuzhiyun static struct timer_list xpc_hb_timer;
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun /* notification that the xpc_hb_checker thread has exited */
153*4882a593Smuzhiyun static DECLARE_COMPLETION(xpc_hb_checker_exited);
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun /* notification that the xpc_discovery thread has exited */
156*4882a593Smuzhiyun static DECLARE_COMPLETION(xpc_discovery_exited);
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
161*4882a593Smuzhiyun static struct notifier_block xpc_reboot_notifier = {
162*4882a593Smuzhiyun .notifier_call = xpc_system_reboot,
163*4882a593Smuzhiyun };
164*4882a593Smuzhiyun
165*4882a593Smuzhiyun static int xpc_system_die(struct notifier_block *, unsigned long, void *);
166*4882a593Smuzhiyun static struct notifier_block xpc_die_notifier = {
167*4882a593Smuzhiyun .notifier_call = xpc_system_die,
168*4882a593Smuzhiyun };
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun struct xpc_arch_operations xpc_arch_ops;
171*4882a593Smuzhiyun
172*4882a593Smuzhiyun /*
173*4882a593Smuzhiyun * Timer function to enforce the timelimit on the partition disengage.
174*4882a593Smuzhiyun */
175*4882a593Smuzhiyun static void
xpc_timeout_partition_disengage(struct timer_list * t)176*4882a593Smuzhiyun xpc_timeout_partition_disengage(struct timer_list *t)
177*4882a593Smuzhiyun {
178*4882a593Smuzhiyun struct xpc_partition *part = from_timer(part, t, disengage_timer);
179*4882a593Smuzhiyun
180*4882a593Smuzhiyun DBUG_ON(time_is_after_jiffies(part->disengage_timeout));
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun (void)xpc_partition_disengaged(part);
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun DBUG_ON(part->disengage_timeout != 0);
185*4882a593Smuzhiyun DBUG_ON(xpc_arch_ops.partition_engaged(XPC_PARTID(part)));
186*4882a593Smuzhiyun }
187*4882a593Smuzhiyun
188*4882a593Smuzhiyun /*
189*4882a593Smuzhiyun * Timer to produce the heartbeat. The timer structures function is
190*4882a593Smuzhiyun * already set when this is initially called. A tunable is used to
191*4882a593Smuzhiyun * specify when the next timeout should occur.
192*4882a593Smuzhiyun */
193*4882a593Smuzhiyun static void
xpc_hb_beater(struct timer_list * unused)194*4882a593Smuzhiyun xpc_hb_beater(struct timer_list *unused)
195*4882a593Smuzhiyun {
196*4882a593Smuzhiyun xpc_arch_ops.increment_heartbeat();
197*4882a593Smuzhiyun
198*4882a593Smuzhiyun if (time_is_before_eq_jiffies(xpc_hb_check_timeout))
199*4882a593Smuzhiyun wake_up_interruptible(&xpc_activate_IRQ_wq);
200*4882a593Smuzhiyun
201*4882a593Smuzhiyun xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
202*4882a593Smuzhiyun add_timer(&xpc_hb_timer);
203*4882a593Smuzhiyun }
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun static void
xpc_start_hb_beater(void)206*4882a593Smuzhiyun xpc_start_hb_beater(void)
207*4882a593Smuzhiyun {
208*4882a593Smuzhiyun xpc_arch_ops.heartbeat_init();
209*4882a593Smuzhiyun timer_setup(&xpc_hb_timer, xpc_hb_beater, 0);
210*4882a593Smuzhiyun xpc_hb_beater(0);
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun
213*4882a593Smuzhiyun static void
xpc_stop_hb_beater(void)214*4882a593Smuzhiyun xpc_stop_hb_beater(void)
215*4882a593Smuzhiyun {
216*4882a593Smuzhiyun del_timer_sync(&xpc_hb_timer);
217*4882a593Smuzhiyun xpc_arch_ops.heartbeat_exit();
218*4882a593Smuzhiyun }
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun /*
221*4882a593Smuzhiyun * At periodic intervals, scan through all active partitions and ensure
222*4882a593Smuzhiyun * their heartbeat is still active. If not, the partition is deactivated.
223*4882a593Smuzhiyun */
224*4882a593Smuzhiyun static void
xpc_check_remote_hb(void)225*4882a593Smuzhiyun xpc_check_remote_hb(void)
226*4882a593Smuzhiyun {
227*4882a593Smuzhiyun struct xpc_partition *part;
228*4882a593Smuzhiyun short partid;
229*4882a593Smuzhiyun enum xp_retval ret;
230*4882a593Smuzhiyun
231*4882a593Smuzhiyun for (partid = 0; partid < xp_max_npartitions; partid++) {
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun if (xpc_exiting)
234*4882a593Smuzhiyun break;
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun if (partid == xp_partition_id)
237*4882a593Smuzhiyun continue;
238*4882a593Smuzhiyun
239*4882a593Smuzhiyun part = &xpc_partitions[partid];
240*4882a593Smuzhiyun
241*4882a593Smuzhiyun if (part->act_state == XPC_P_AS_INACTIVE ||
242*4882a593Smuzhiyun part->act_state == XPC_P_AS_DEACTIVATING) {
243*4882a593Smuzhiyun continue;
244*4882a593Smuzhiyun }
245*4882a593Smuzhiyun
246*4882a593Smuzhiyun ret = xpc_arch_ops.get_remote_heartbeat(part);
247*4882a593Smuzhiyun if (ret != xpSuccess)
248*4882a593Smuzhiyun XPC_DEACTIVATE_PARTITION(part, ret);
249*4882a593Smuzhiyun }
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun
252*4882a593Smuzhiyun /*
253*4882a593Smuzhiyun * This thread is responsible for nearly all of the partition
254*4882a593Smuzhiyun * activation/deactivation.
255*4882a593Smuzhiyun */
256*4882a593Smuzhiyun static int
xpc_hb_checker(void * ignore)257*4882a593Smuzhiyun xpc_hb_checker(void *ignore)
258*4882a593Smuzhiyun {
259*4882a593Smuzhiyun int force_IRQ = 0;
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun /* this thread was marked active by xpc_hb_init() */
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun set_cpus_allowed_ptr(current, cpumask_of(XPC_HB_CHECK_CPU));
264*4882a593Smuzhiyun
265*4882a593Smuzhiyun /* set our heartbeating to other partitions into motion */
266*4882a593Smuzhiyun xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
267*4882a593Smuzhiyun xpc_start_hb_beater();
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun while (!xpc_exiting) {
270*4882a593Smuzhiyun
271*4882a593Smuzhiyun dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
272*4882a593Smuzhiyun "been received\n",
273*4882a593Smuzhiyun (int)(xpc_hb_check_timeout - jiffies),
274*4882a593Smuzhiyun xpc_activate_IRQ_rcvd);
275*4882a593Smuzhiyun
276*4882a593Smuzhiyun /* checking of remote heartbeats is skewed by IRQ handling */
277*4882a593Smuzhiyun if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) {
278*4882a593Smuzhiyun xpc_hb_check_timeout = jiffies +
279*4882a593Smuzhiyun (xpc_hb_check_interval * HZ);
280*4882a593Smuzhiyun
281*4882a593Smuzhiyun dev_dbg(xpc_part, "checking remote heartbeats\n");
282*4882a593Smuzhiyun xpc_check_remote_hb();
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun
285*4882a593Smuzhiyun /* check for outstanding IRQs */
286*4882a593Smuzhiyun if (xpc_activate_IRQ_rcvd > 0 || force_IRQ != 0) {
287*4882a593Smuzhiyun force_IRQ = 0;
288*4882a593Smuzhiyun dev_dbg(xpc_part, "processing activate IRQs "
289*4882a593Smuzhiyun "received\n");
290*4882a593Smuzhiyun xpc_arch_ops.process_activate_IRQ_rcvd();
291*4882a593Smuzhiyun }
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun /* wait for IRQ or timeout */
294*4882a593Smuzhiyun (void)wait_event_interruptible(xpc_activate_IRQ_wq,
295*4882a593Smuzhiyun (time_is_before_eq_jiffies(
296*4882a593Smuzhiyun xpc_hb_check_timeout) ||
297*4882a593Smuzhiyun xpc_activate_IRQ_rcvd > 0 ||
298*4882a593Smuzhiyun xpc_exiting));
299*4882a593Smuzhiyun }
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun xpc_stop_hb_beater();
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun dev_dbg(xpc_part, "heartbeat checker is exiting\n");
304*4882a593Smuzhiyun
305*4882a593Smuzhiyun /* mark this thread as having exited */
306*4882a593Smuzhiyun complete(&xpc_hb_checker_exited);
307*4882a593Smuzhiyun return 0;
308*4882a593Smuzhiyun }
309*4882a593Smuzhiyun
310*4882a593Smuzhiyun /*
311*4882a593Smuzhiyun * This thread will attempt to discover other partitions to activate
312*4882a593Smuzhiyun * based on info provided by SAL. This new thread is short lived and
313*4882a593Smuzhiyun * will exit once discovery is complete.
314*4882a593Smuzhiyun */
315*4882a593Smuzhiyun static int
xpc_initiate_discovery(void * ignore)316*4882a593Smuzhiyun xpc_initiate_discovery(void *ignore)
317*4882a593Smuzhiyun {
318*4882a593Smuzhiyun xpc_discovery();
319*4882a593Smuzhiyun
320*4882a593Smuzhiyun dev_dbg(xpc_part, "discovery thread is exiting\n");
321*4882a593Smuzhiyun
322*4882a593Smuzhiyun /* mark this thread as having exited */
323*4882a593Smuzhiyun complete(&xpc_discovery_exited);
324*4882a593Smuzhiyun return 0;
325*4882a593Smuzhiyun }
326*4882a593Smuzhiyun
327*4882a593Smuzhiyun /*
328*4882a593Smuzhiyun * The first kthread assigned to a newly activated partition is the one
329*4882a593Smuzhiyun * created by XPC HB with which it calls xpc_activating(). XPC hangs on to
330*4882a593Smuzhiyun * that kthread until the partition is brought down, at which time that kthread
331*4882a593Smuzhiyun * returns back to XPC HB. (The return of that kthread will signify to XPC HB
332*4882a593Smuzhiyun * that XPC has dismantled all communication infrastructure for the associated
333*4882a593Smuzhiyun * partition.) This kthread becomes the channel manager for that partition.
334*4882a593Smuzhiyun *
335*4882a593Smuzhiyun * Each active partition has a channel manager, who, besides connecting and
336*4882a593Smuzhiyun * disconnecting channels, will ensure that each of the partition's connected
337*4882a593Smuzhiyun * channels has the required number of assigned kthreads to get the work done.
338*4882a593Smuzhiyun */
339*4882a593Smuzhiyun static void
xpc_channel_mgr(struct xpc_partition * part)340*4882a593Smuzhiyun xpc_channel_mgr(struct xpc_partition *part)
341*4882a593Smuzhiyun {
342*4882a593Smuzhiyun while (part->act_state != XPC_P_AS_DEACTIVATING ||
343*4882a593Smuzhiyun atomic_read(&part->nchannels_active) > 0 ||
344*4882a593Smuzhiyun !xpc_partition_disengaged(part)) {
345*4882a593Smuzhiyun
346*4882a593Smuzhiyun xpc_process_sent_chctl_flags(part);
347*4882a593Smuzhiyun
348*4882a593Smuzhiyun /*
349*4882a593Smuzhiyun * Wait until we've been requested to activate kthreads or
350*4882a593Smuzhiyun * all of the channel's message queues have been torn down or
351*4882a593Smuzhiyun * a signal is pending.
352*4882a593Smuzhiyun *
353*4882a593Smuzhiyun * The channel_mgr_requests is set to 1 after being awakened,
354*4882a593Smuzhiyun * This is done to prevent the channel mgr from making one pass
355*4882a593Smuzhiyun * through the loop for each request, since he will
356*4882a593Smuzhiyun * be servicing all the requests in one pass. The reason it's
357*4882a593Smuzhiyun * set to 1 instead of 0 is so that other kthreads will know
358*4882a593Smuzhiyun * that the channel mgr is running and won't bother trying to
359*4882a593Smuzhiyun * wake him up.
360*4882a593Smuzhiyun */
361*4882a593Smuzhiyun atomic_dec(&part->channel_mgr_requests);
362*4882a593Smuzhiyun (void)wait_event_interruptible(part->channel_mgr_wq,
363*4882a593Smuzhiyun (atomic_read(&part->channel_mgr_requests) > 0 ||
364*4882a593Smuzhiyun part->chctl.all_flags != 0 ||
365*4882a593Smuzhiyun (part->act_state == XPC_P_AS_DEACTIVATING &&
366*4882a593Smuzhiyun atomic_read(&part->nchannels_active) == 0 &&
367*4882a593Smuzhiyun xpc_partition_disengaged(part))));
368*4882a593Smuzhiyun atomic_set(&part->channel_mgr_requests, 1);
369*4882a593Smuzhiyun }
370*4882a593Smuzhiyun }
371*4882a593Smuzhiyun
372*4882a593Smuzhiyun /*
373*4882a593Smuzhiyun * Guarantee that the kzalloc'd memory is cacheline aligned.
374*4882a593Smuzhiyun */
375*4882a593Smuzhiyun void *
xpc_kzalloc_cacheline_aligned(size_t size,gfp_t flags,void ** base)376*4882a593Smuzhiyun xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
377*4882a593Smuzhiyun {
378*4882a593Smuzhiyun /* see if kzalloc will give us cachline aligned memory by default */
379*4882a593Smuzhiyun *base = kzalloc(size, flags);
380*4882a593Smuzhiyun if (*base == NULL)
381*4882a593Smuzhiyun return NULL;
382*4882a593Smuzhiyun
383*4882a593Smuzhiyun if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
384*4882a593Smuzhiyun return *base;
385*4882a593Smuzhiyun
386*4882a593Smuzhiyun kfree(*base);
387*4882a593Smuzhiyun
388*4882a593Smuzhiyun /* nope, we'll have to do it ourselves */
389*4882a593Smuzhiyun *base = kzalloc(size + L1_CACHE_BYTES, flags);
390*4882a593Smuzhiyun if (*base == NULL)
391*4882a593Smuzhiyun return NULL;
392*4882a593Smuzhiyun
393*4882a593Smuzhiyun return (void *)L1_CACHE_ALIGN((u64)*base);
394*4882a593Smuzhiyun }
395*4882a593Smuzhiyun
396*4882a593Smuzhiyun /*
397*4882a593Smuzhiyun * Setup the channel structures necessary to support XPartition Communication
398*4882a593Smuzhiyun * between the specified remote partition and the local one.
399*4882a593Smuzhiyun */
400*4882a593Smuzhiyun static enum xp_retval
xpc_setup_ch_structures(struct xpc_partition * part)401*4882a593Smuzhiyun xpc_setup_ch_structures(struct xpc_partition *part)
402*4882a593Smuzhiyun {
403*4882a593Smuzhiyun enum xp_retval ret;
404*4882a593Smuzhiyun int ch_number;
405*4882a593Smuzhiyun struct xpc_channel *ch;
406*4882a593Smuzhiyun short partid = XPC_PARTID(part);
407*4882a593Smuzhiyun
408*4882a593Smuzhiyun /*
409*4882a593Smuzhiyun * Allocate all of the channel structures as a contiguous chunk of
410*4882a593Smuzhiyun * memory.
411*4882a593Smuzhiyun */
412*4882a593Smuzhiyun DBUG_ON(part->channels != NULL);
413*4882a593Smuzhiyun part->channels = kcalloc(XPC_MAX_NCHANNELS,
414*4882a593Smuzhiyun sizeof(struct xpc_channel),
415*4882a593Smuzhiyun GFP_KERNEL);
416*4882a593Smuzhiyun if (part->channels == NULL) {
417*4882a593Smuzhiyun dev_err(xpc_chan, "can't get memory for channels\n");
418*4882a593Smuzhiyun return xpNoMemory;
419*4882a593Smuzhiyun }
420*4882a593Smuzhiyun
421*4882a593Smuzhiyun /* allocate the remote open and close args */
422*4882a593Smuzhiyun
423*4882a593Smuzhiyun part->remote_openclose_args =
424*4882a593Smuzhiyun xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE,
425*4882a593Smuzhiyun GFP_KERNEL, &part->
426*4882a593Smuzhiyun remote_openclose_args_base);
427*4882a593Smuzhiyun if (part->remote_openclose_args == NULL) {
428*4882a593Smuzhiyun dev_err(xpc_chan, "can't get memory for remote connect args\n");
429*4882a593Smuzhiyun ret = xpNoMemory;
430*4882a593Smuzhiyun goto out_1;
431*4882a593Smuzhiyun }
432*4882a593Smuzhiyun
433*4882a593Smuzhiyun part->chctl.all_flags = 0;
434*4882a593Smuzhiyun spin_lock_init(&part->chctl_lock);
435*4882a593Smuzhiyun
436*4882a593Smuzhiyun atomic_set(&part->channel_mgr_requests, 1);
437*4882a593Smuzhiyun init_waitqueue_head(&part->channel_mgr_wq);
438*4882a593Smuzhiyun
439*4882a593Smuzhiyun part->nchannels = XPC_MAX_NCHANNELS;
440*4882a593Smuzhiyun
441*4882a593Smuzhiyun atomic_set(&part->nchannels_active, 0);
442*4882a593Smuzhiyun atomic_set(&part->nchannels_engaged, 0);
443*4882a593Smuzhiyun
444*4882a593Smuzhiyun for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
445*4882a593Smuzhiyun ch = &part->channels[ch_number];
446*4882a593Smuzhiyun
447*4882a593Smuzhiyun ch->partid = partid;
448*4882a593Smuzhiyun ch->number = ch_number;
449*4882a593Smuzhiyun ch->flags = XPC_C_DISCONNECTED;
450*4882a593Smuzhiyun
451*4882a593Smuzhiyun atomic_set(&ch->kthreads_assigned, 0);
452*4882a593Smuzhiyun atomic_set(&ch->kthreads_idle, 0);
453*4882a593Smuzhiyun atomic_set(&ch->kthreads_active, 0);
454*4882a593Smuzhiyun
455*4882a593Smuzhiyun atomic_set(&ch->references, 0);
456*4882a593Smuzhiyun atomic_set(&ch->n_to_notify, 0);
457*4882a593Smuzhiyun
458*4882a593Smuzhiyun spin_lock_init(&ch->lock);
459*4882a593Smuzhiyun init_completion(&ch->wdisconnect_wait);
460*4882a593Smuzhiyun
461*4882a593Smuzhiyun atomic_set(&ch->n_on_msg_allocate_wq, 0);
462*4882a593Smuzhiyun init_waitqueue_head(&ch->msg_allocate_wq);
463*4882a593Smuzhiyun init_waitqueue_head(&ch->idle_wq);
464*4882a593Smuzhiyun }
465*4882a593Smuzhiyun
466*4882a593Smuzhiyun ret = xpc_arch_ops.setup_ch_structures(part);
467*4882a593Smuzhiyun if (ret != xpSuccess)
468*4882a593Smuzhiyun goto out_2;
469*4882a593Smuzhiyun
470*4882a593Smuzhiyun /*
471*4882a593Smuzhiyun * With the setting of the partition setup_state to XPC_P_SS_SETUP,
472*4882a593Smuzhiyun * we're declaring that this partition is ready to go.
473*4882a593Smuzhiyun */
474*4882a593Smuzhiyun part->setup_state = XPC_P_SS_SETUP;
475*4882a593Smuzhiyun
476*4882a593Smuzhiyun return xpSuccess;
477*4882a593Smuzhiyun
478*4882a593Smuzhiyun /* setup of ch structures failed */
479*4882a593Smuzhiyun out_2:
480*4882a593Smuzhiyun kfree(part->remote_openclose_args_base);
481*4882a593Smuzhiyun part->remote_openclose_args = NULL;
482*4882a593Smuzhiyun out_1:
483*4882a593Smuzhiyun kfree(part->channels);
484*4882a593Smuzhiyun part->channels = NULL;
485*4882a593Smuzhiyun return ret;
486*4882a593Smuzhiyun }
487*4882a593Smuzhiyun
488*4882a593Smuzhiyun /*
489*4882a593Smuzhiyun * Teardown the channel structures necessary to support XPartition Communication
490*4882a593Smuzhiyun * between the specified remote partition and the local one.
491*4882a593Smuzhiyun */
492*4882a593Smuzhiyun static void
xpc_teardown_ch_structures(struct xpc_partition * part)493*4882a593Smuzhiyun xpc_teardown_ch_structures(struct xpc_partition *part)
494*4882a593Smuzhiyun {
495*4882a593Smuzhiyun DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
496*4882a593Smuzhiyun DBUG_ON(atomic_read(&part->nchannels_active) != 0);
497*4882a593Smuzhiyun
498*4882a593Smuzhiyun /*
499*4882a593Smuzhiyun * Make this partition inaccessible to local processes by marking it
500*4882a593Smuzhiyun * as no longer setup. Then wait before proceeding with the teardown
501*4882a593Smuzhiyun * until all existing references cease.
502*4882a593Smuzhiyun */
503*4882a593Smuzhiyun DBUG_ON(part->setup_state != XPC_P_SS_SETUP);
504*4882a593Smuzhiyun part->setup_state = XPC_P_SS_WTEARDOWN;
505*4882a593Smuzhiyun
506*4882a593Smuzhiyun wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
507*4882a593Smuzhiyun
508*4882a593Smuzhiyun /* now we can begin tearing down the infrastructure */
509*4882a593Smuzhiyun
510*4882a593Smuzhiyun xpc_arch_ops.teardown_ch_structures(part);
511*4882a593Smuzhiyun
512*4882a593Smuzhiyun kfree(part->remote_openclose_args_base);
513*4882a593Smuzhiyun part->remote_openclose_args = NULL;
514*4882a593Smuzhiyun kfree(part->channels);
515*4882a593Smuzhiyun part->channels = NULL;
516*4882a593Smuzhiyun
517*4882a593Smuzhiyun part->setup_state = XPC_P_SS_TORNDOWN;
518*4882a593Smuzhiyun }
519*4882a593Smuzhiyun
520*4882a593Smuzhiyun /*
521*4882a593Smuzhiyun * When XPC HB determines that a partition has come up, it will create a new
522*4882a593Smuzhiyun * kthread and that kthread will call this function to attempt to set up the
523*4882a593Smuzhiyun * basic infrastructure used for Cross Partition Communication with the newly
524*4882a593Smuzhiyun * upped partition.
525*4882a593Smuzhiyun *
526*4882a593Smuzhiyun * The kthread that was created by XPC HB and which setup the XPC
527*4882a593Smuzhiyun * infrastructure will remain assigned to the partition becoming the channel
528*4882a593Smuzhiyun * manager for that partition until the partition is deactivating, at which
529*4882a593Smuzhiyun * time the kthread will teardown the XPC infrastructure and then exit.
530*4882a593Smuzhiyun */
531*4882a593Smuzhiyun static int
xpc_activating(void * __partid)532*4882a593Smuzhiyun xpc_activating(void *__partid)
533*4882a593Smuzhiyun {
534*4882a593Smuzhiyun short partid = (u64)__partid;
535*4882a593Smuzhiyun struct xpc_partition *part = &xpc_partitions[partid];
536*4882a593Smuzhiyun unsigned long irq_flags;
537*4882a593Smuzhiyun
538*4882a593Smuzhiyun DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
539*4882a593Smuzhiyun
540*4882a593Smuzhiyun spin_lock_irqsave(&part->act_lock, irq_flags);
541*4882a593Smuzhiyun
542*4882a593Smuzhiyun if (part->act_state == XPC_P_AS_DEACTIVATING) {
543*4882a593Smuzhiyun part->act_state = XPC_P_AS_INACTIVE;
544*4882a593Smuzhiyun spin_unlock_irqrestore(&part->act_lock, irq_flags);
545*4882a593Smuzhiyun part->remote_rp_pa = 0;
546*4882a593Smuzhiyun return 0;
547*4882a593Smuzhiyun }
548*4882a593Smuzhiyun
549*4882a593Smuzhiyun /* indicate the thread is activating */
550*4882a593Smuzhiyun DBUG_ON(part->act_state != XPC_P_AS_ACTIVATION_REQ);
551*4882a593Smuzhiyun part->act_state = XPC_P_AS_ACTIVATING;
552*4882a593Smuzhiyun
553*4882a593Smuzhiyun XPC_SET_REASON(part, 0, 0);
554*4882a593Smuzhiyun spin_unlock_irqrestore(&part->act_lock, irq_flags);
555*4882a593Smuzhiyun
556*4882a593Smuzhiyun dev_dbg(xpc_part, "activating partition %d\n", partid);
557*4882a593Smuzhiyun
558*4882a593Smuzhiyun xpc_arch_ops.allow_hb(partid);
559*4882a593Smuzhiyun
560*4882a593Smuzhiyun if (xpc_setup_ch_structures(part) == xpSuccess) {
561*4882a593Smuzhiyun (void)xpc_part_ref(part); /* this will always succeed */
562*4882a593Smuzhiyun
563*4882a593Smuzhiyun if (xpc_arch_ops.make_first_contact(part) == xpSuccess) {
564*4882a593Smuzhiyun xpc_mark_partition_active(part);
565*4882a593Smuzhiyun xpc_channel_mgr(part);
566*4882a593Smuzhiyun /* won't return until partition is deactivating */
567*4882a593Smuzhiyun }
568*4882a593Smuzhiyun
569*4882a593Smuzhiyun xpc_part_deref(part);
570*4882a593Smuzhiyun xpc_teardown_ch_structures(part);
571*4882a593Smuzhiyun }
572*4882a593Smuzhiyun
573*4882a593Smuzhiyun xpc_arch_ops.disallow_hb(partid);
574*4882a593Smuzhiyun xpc_mark_partition_inactive(part);
575*4882a593Smuzhiyun
576*4882a593Smuzhiyun if (part->reason == xpReactivating) {
577*4882a593Smuzhiyun /* interrupting ourselves results in activating partition */
578*4882a593Smuzhiyun xpc_arch_ops.request_partition_reactivation(part);
579*4882a593Smuzhiyun }
580*4882a593Smuzhiyun
581*4882a593Smuzhiyun return 0;
582*4882a593Smuzhiyun }
583*4882a593Smuzhiyun
584*4882a593Smuzhiyun void
xpc_activate_partition(struct xpc_partition * part)585*4882a593Smuzhiyun xpc_activate_partition(struct xpc_partition *part)
586*4882a593Smuzhiyun {
587*4882a593Smuzhiyun short partid = XPC_PARTID(part);
588*4882a593Smuzhiyun unsigned long irq_flags;
589*4882a593Smuzhiyun struct task_struct *kthread;
590*4882a593Smuzhiyun
591*4882a593Smuzhiyun spin_lock_irqsave(&part->act_lock, irq_flags);
592*4882a593Smuzhiyun
593*4882a593Smuzhiyun DBUG_ON(part->act_state != XPC_P_AS_INACTIVE);
594*4882a593Smuzhiyun
595*4882a593Smuzhiyun part->act_state = XPC_P_AS_ACTIVATION_REQ;
596*4882a593Smuzhiyun XPC_SET_REASON(part, xpCloneKThread, __LINE__);
597*4882a593Smuzhiyun
598*4882a593Smuzhiyun spin_unlock_irqrestore(&part->act_lock, irq_flags);
599*4882a593Smuzhiyun
600*4882a593Smuzhiyun kthread = kthread_run(xpc_activating, (void *)((u64)partid), "xpc%02d",
601*4882a593Smuzhiyun partid);
602*4882a593Smuzhiyun if (IS_ERR(kthread)) {
603*4882a593Smuzhiyun spin_lock_irqsave(&part->act_lock, irq_flags);
604*4882a593Smuzhiyun part->act_state = XPC_P_AS_INACTIVE;
605*4882a593Smuzhiyun XPC_SET_REASON(part, xpCloneKThreadFailed, __LINE__);
606*4882a593Smuzhiyun spin_unlock_irqrestore(&part->act_lock, irq_flags);
607*4882a593Smuzhiyun }
608*4882a593Smuzhiyun }
609*4882a593Smuzhiyun
610*4882a593Smuzhiyun void
xpc_activate_kthreads(struct xpc_channel * ch,int needed)611*4882a593Smuzhiyun xpc_activate_kthreads(struct xpc_channel *ch, int needed)
612*4882a593Smuzhiyun {
613*4882a593Smuzhiyun int idle = atomic_read(&ch->kthreads_idle);
614*4882a593Smuzhiyun int assigned = atomic_read(&ch->kthreads_assigned);
615*4882a593Smuzhiyun int wakeup;
616*4882a593Smuzhiyun
617*4882a593Smuzhiyun DBUG_ON(needed <= 0);
618*4882a593Smuzhiyun
619*4882a593Smuzhiyun if (idle > 0) {
620*4882a593Smuzhiyun wakeup = (needed > idle) ? idle : needed;
621*4882a593Smuzhiyun needed -= wakeup;
622*4882a593Smuzhiyun
623*4882a593Smuzhiyun dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, "
624*4882a593Smuzhiyun "channel=%d\n", wakeup, ch->partid, ch->number);
625*4882a593Smuzhiyun
626*4882a593Smuzhiyun /* only wakeup the requested number of kthreads */
627*4882a593Smuzhiyun wake_up_nr(&ch->idle_wq, wakeup);
628*4882a593Smuzhiyun }
629*4882a593Smuzhiyun
630*4882a593Smuzhiyun if (needed <= 0)
631*4882a593Smuzhiyun return;
632*4882a593Smuzhiyun
633*4882a593Smuzhiyun if (needed + assigned > ch->kthreads_assigned_limit) {
634*4882a593Smuzhiyun needed = ch->kthreads_assigned_limit - assigned;
635*4882a593Smuzhiyun if (needed <= 0)
636*4882a593Smuzhiyun return;
637*4882a593Smuzhiyun }
638*4882a593Smuzhiyun
639*4882a593Smuzhiyun dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n",
640*4882a593Smuzhiyun needed, ch->partid, ch->number);
641*4882a593Smuzhiyun
642*4882a593Smuzhiyun xpc_create_kthreads(ch, needed, 0);
643*4882a593Smuzhiyun }
644*4882a593Smuzhiyun
645*4882a593Smuzhiyun /*
646*4882a593Smuzhiyun * This function is where XPC's kthreads wait for messages to deliver.
647*4882a593Smuzhiyun */
648*4882a593Smuzhiyun static void
xpc_kthread_waitmsgs(struct xpc_partition * part,struct xpc_channel * ch)649*4882a593Smuzhiyun xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
650*4882a593Smuzhiyun {
651*4882a593Smuzhiyun int (*n_of_deliverable_payloads) (struct xpc_channel *) =
652*4882a593Smuzhiyun xpc_arch_ops.n_of_deliverable_payloads;
653*4882a593Smuzhiyun
654*4882a593Smuzhiyun do {
655*4882a593Smuzhiyun /* deliver messages to their intended recipients */
656*4882a593Smuzhiyun
657*4882a593Smuzhiyun while (n_of_deliverable_payloads(ch) > 0 &&
658*4882a593Smuzhiyun !(ch->flags & XPC_C_DISCONNECTING)) {
659*4882a593Smuzhiyun xpc_deliver_payload(ch);
660*4882a593Smuzhiyun }
661*4882a593Smuzhiyun
662*4882a593Smuzhiyun if (atomic_inc_return(&ch->kthreads_idle) >
663*4882a593Smuzhiyun ch->kthreads_idle_limit) {
664*4882a593Smuzhiyun /* too many idle kthreads on this channel */
665*4882a593Smuzhiyun atomic_dec(&ch->kthreads_idle);
666*4882a593Smuzhiyun break;
667*4882a593Smuzhiyun }
668*4882a593Smuzhiyun
669*4882a593Smuzhiyun dev_dbg(xpc_chan, "idle kthread calling "
670*4882a593Smuzhiyun "wait_event_interruptible_exclusive()\n");
671*4882a593Smuzhiyun
672*4882a593Smuzhiyun (void)wait_event_interruptible_exclusive(ch->idle_wq,
673*4882a593Smuzhiyun (n_of_deliverable_payloads(ch) > 0 ||
674*4882a593Smuzhiyun (ch->flags & XPC_C_DISCONNECTING)));
675*4882a593Smuzhiyun
676*4882a593Smuzhiyun atomic_dec(&ch->kthreads_idle);
677*4882a593Smuzhiyun
678*4882a593Smuzhiyun } while (!(ch->flags & XPC_C_DISCONNECTING));
679*4882a593Smuzhiyun }
680*4882a593Smuzhiyun
681*4882a593Smuzhiyun static int
xpc_kthread_start(void * args)682*4882a593Smuzhiyun xpc_kthread_start(void *args)
683*4882a593Smuzhiyun {
684*4882a593Smuzhiyun short partid = XPC_UNPACK_ARG1(args);
685*4882a593Smuzhiyun u16 ch_number = XPC_UNPACK_ARG2(args);
686*4882a593Smuzhiyun struct xpc_partition *part = &xpc_partitions[partid];
687*4882a593Smuzhiyun struct xpc_channel *ch;
688*4882a593Smuzhiyun int n_needed;
689*4882a593Smuzhiyun unsigned long irq_flags;
690*4882a593Smuzhiyun int (*n_of_deliverable_payloads) (struct xpc_channel *) =
691*4882a593Smuzhiyun xpc_arch_ops.n_of_deliverable_payloads;
692*4882a593Smuzhiyun
693*4882a593Smuzhiyun dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n",
694*4882a593Smuzhiyun partid, ch_number);
695*4882a593Smuzhiyun
696*4882a593Smuzhiyun ch = &part->channels[ch_number];
697*4882a593Smuzhiyun
698*4882a593Smuzhiyun if (!(ch->flags & XPC_C_DISCONNECTING)) {
699*4882a593Smuzhiyun
700*4882a593Smuzhiyun /* let registerer know that connection has been established */
701*4882a593Smuzhiyun
702*4882a593Smuzhiyun spin_lock_irqsave(&ch->lock, irq_flags);
703*4882a593Smuzhiyun if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) {
704*4882a593Smuzhiyun ch->flags |= XPC_C_CONNECTEDCALLOUT;
705*4882a593Smuzhiyun spin_unlock_irqrestore(&ch->lock, irq_flags);
706*4882a593Smuzhiyun
707*4882a593Smuzhiyun xpc_connected_callout(ch);
708*4882a593Smuzhiyun
709*4882a593Smuzhiyun spin_lock_irqsave(&ch->lock, irq_flags);
710*4882a593Smuzhiyun ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE;
711*4882a593Smuzhiyun spin_unlock_irqrestore(&ch->lock, irq_flags);
712*4882a593Smuzhiyun
713*4882a593Smuzhiyun /*
714*4882a593Smuzhiyun * It is possible that while the callout was being
715*4882a593Smuzhiyun * made that the remote partition sent some messages.
716*4882a593Smuzhiyun * If that is the case, we may need to activate
717*4882a593Smuzhiyun * additional kthreads to help deliver them. We only
718*4882a593Smuzhiyun * need one less than total #of messages to deliver.
719*4882a593Smuzhiyun */
720*4882a593Smuzhiyun n_needed = n_of_deliverable_payloads(ch) - 1;
721*4882a593Smuzhiyun if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING))
722*4882a593Smuzhiyun xpc_activate_kthreads(ch, n_needed);
723*4882a593Smuzhiyun
724*4882a593Smuzhiyun } else {
725*4882a593Smuzhiyun spin_unlock_irqrestore(&ch->lock, irq_flags);
726*4882a593Smuzhiyun }
727*4882a593Smuzhiyun
728*4882a593Smuzhiyun xpc_kthread_waitmsgs(part, ch);
729*4882a593Smuzhiyun }
730*4882a593Smuzhiyun
731*4882a593Smuzhiyun /* let registerer know that connection is disconnecting */
732*4882a593Smuzhiyun
733*4882a593Smuzhiyun spin_lock_irqsave(&ch->lock, irq_flags);
734*4882a593Smuzhiyun if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
735*4882a593Smuzhiyun !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
736*4882a593Smuzhiyun ch->flags |= XPC_C_DISCONNECTINGCALLOUT;
737*4882a593Smuzhiyun spin_unlock_irqrestore(&ch->lock, irq_flags);
738*4882a593Smuzhiyun
739*4882a593Smuzhiyun xpc_disconnect_callout(ch, xpDisconnecting);
740*4882a593Smuzhiyun
741*4882a593Smuzhiyun spin_lock_irqsave(&ch->lock, irq_flags);
742*4882a593Smuzhiyun ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE;
743*4882a593Smuzhiyun }
744*4882a593Smuzhiyun spin_unlock_irqrestore(&ch->lock, irq_flags);
745*4882a593Smuzhiyun
746*4882a593Smuzhiyun if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
747*4882a593Smuzhiyun atomic_dec_return(&part->nchannels_engaged) == 0) {
748*4882a593Smuzhiyun xpc_arch_ops.indicate_partition_disengaged(part);
749*4882a593Smuzhiyun }
750*4882a593Smuzhiyun
751*4882a593Smuzhiyun xpc_msgqueue_deref(ch);
752*4882a593Smuzhiyun
753*4882a593Smuzhiyun dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n",
754*4882a593Smuzhiyun partid, ch_number);
755*4882a593Smuzhiyun
756*4882a593Smuzhiyun xpc_part_deref(part);
757*4882a593Smuzhiyun return 0;
758*4882a593Smuzhiyun }
759*4882a593Smuzhiyun
760*4882a593Smuzhiyun /*
761*4882a593Smuzhiyun * For each partition that XPC has established communications with, there is
762*4882a593Smuzhiyun * a minimum of one kernel thread assigned to perform any operation that
763*4882a593Smuzhiyun * may potentially sleep or block (basically the callouts to the asynchronous
764*4882a593Smuzhiyun * functions registered via xpc_connect()).
765*4882a593Smuzhiyun *
766*4882a593Smuzhiyun * Additional kthreads are created and destroyed by XPC as the workload
767*4882a593Smuzhiyun * demands.
768*4882a593Smuzhiyun *
769*4882a593Smuzhiyun * A kthread is assigned to one of the active channels that exists for a given
770*4882a593Smuzhiyun * partition.
771*4882a593Smuzhiyun */
772*4882a593Smuzhiyun void
xpc_create_kthreads(struct xpc_channel * ch,int needed,int ignore_disconnecting)773*4882a593Smuzhiyun xpc_create_kthreads(struct xpc_channel *ch, int needed,
774*4882a593Smuzhiyun int ignore_disconnecting)
775*4882a593Smuzhiyun {
776*4882a593Smuzhiyun unsigned long irq_flags;
777*4882a593Smuzhiyun u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
778*4882a593Smuzhiyun struct xpc_partition *part = &xpc_partitions[ch->partid];
779*4882a593Smuzhiyun struct task_struct *kthread;
780*4882a593Smuzhiyun void (*indicate_partition_disengaged) (struct xpc_partition *) =
781*4882a593Smuzhiyun xpc_arch_ops.indicate_partition_disengaged;
782*4882a593Smuzhiyun
783*4882a593Smuzhiyun while (needed-- > 0) {
784*4882a593Smuzhiyun
785*4882a593Smuzhiyun /*
786*4882a593Smuzhiyun * The following is done on behalf of the newly created
787*4882a593Smuzhiyun * kthread. That kthread is responsible for doing the
788*4882a593Smuzhiyun * counterpart to the following before it exits.
789*4882a593Smuzhiyun */
790*4882a593Smuzhiyun if (ignore_disconnecting) {
791*4882a593Smuzhiyun if (!atomic_inc_not_zero(&ch->kthreads_assigned)) {
792*4882a593Smuzhiyun /* kthreads assigned had gone to zero */
793*4882a593Smuzhiyun BUG_ON(!(ch->flags &
794*4882a593Smuzhiyun XPC_C_DISCONNECTINGCALLOUT_MADE));
795*4882a593Smuzhiyun break;
796*4882a593Smuzhiyun }
797*4882a593Smuzhiyun
798*4882a593Smuzhiyun } else if (ch->flags & XPC_C_DISCONNECTING) {
799*4882a593Smuzhiyun break;
800*4882a593Smuzhiyun
801*4882a593Smuzhiyun } else if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
802*4882a593Smuzhiyun atomic_inc_return(&part->nchannels_engaged) == 1) {
803*4882a593Smuzhiyun xpc_arch_ops.indicate_partition_engaged(part);
804*4882a593Smuzhiyun }
805*4882a593Smuzhiyun (void)xpc_part_ref(part);
806*4882a593Smuzhiyun xpc_msgqueue_ref(ch);
807*4882a593Smuzhiyun
808*4882a593Smuzhiyun kthread = kthread_run(xpc_kthread_start, (void *)args,
809*4882a593Smuzhiyun "xpc%02dc%d", ch->partid, ch->number);
810*4882a593Smuzhiyun if (IS_ERR(kthread)) {
811*4882a593Smuzhiyun /* the fork failed */
812*4882a593Smuzhiyun
813*4882a593Smuzhiyun /*
814*4882a593Smuzhiyun * NOTE: if (ignore_disconnecting &&
815*4882a593Smuzhiyun * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true,
816*4882a593Smuzhiyun * then we'll deadlock if all other kthreads assigned
817*4882a593Smuzhiyun * to this channel are blocked in the channel's
818*4882a593Smuzhiyun * registerer, because the only thing that will unblock
819*4882a593Smuzhiyun * them is the xpDisconnecting callout that this
820*4882a593Smuzhiyun * failed kthread_run() would have made.
821*4882a593Smuzhiyun */
822*4882a593Smuzhiyun
823*4882a593Smuzhiyun if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
824*4882a593Smuzhiyun atomic_dec_return(&part->nchannels_engaged) == 0) {
825*4882a593Smuzhiyun indicate_partition_disengaged(part);
826*4882a593Smuzhiyun }
827*4882a593Smuzhiyun xpc_msgqueue_deref(ch);
828*4882a593Smuzhiyun xpc_part_deref(part);
829*4882a593Smuzhiyun
830*4882a593Smuzhiyun if (atomic_read(&ch->kthreads_assigned) <
831*4882a593Smuzhiyun ch->kthreads_idle_limit) {
832*4882a593Smuzhiyun /*
833*4882a593Smuzhiyun * Flag this as an error only if we have an
834*4882a593Smuzhiyun * insufficient #of kthreads for the channel
835*4882a593Smuzhiyun * to function.
836*4882a593Smuzhiyun */
837*4882a593Smuzhiyun spin_lock_irqsave(&ch->lock, irq_flags);
838*4882a593Smuzhiyun XPC_DISCONNECT_CHANNEL(ch, xpLackOfResources,
839*4882a593Smuzhiyun &irq_flags);
840*4882a593Smuzhiyun spin_unlock_irqrestore(&ch->lock, irq_flags);
841*4882a593Smuzhiyun }
842*4882a593Smuzhiyun break;
843*4882a593Smuzhiyun }
844*4882a593Smuzhiyun }
845*4882a593Smuzhiyun }
846*4882a593Smuzhiyun
847*4882a593Smuzhiyun void
xpc_disconnect_wait(int ch_number)848*4882a593Smuzhiyun xpc_disconnect_wait(int ch_number)
849*4882a593Smuzhiyun {
850*4882a593Smuzhiyun unsigned long irq_flags;
851*4882a593Smuzhiyun short partid;
852*4882a593Smuzhiyun struct xpc_partition *part;
853*4882a593Smuzhiyun struct xpc_channel *ch;
854*4882a593Smuzhiyun int wakeup_channel_mgr;
855*4882a593Smuzhiyun
856*4882a593Smuzhiyun /* now wait for all callouts to the caller's function to cease */
857*4882a593Smuzhiyun for (partid = 0; partid < xp_max_npartitions; partid++) {
858*4882a593Smuzhiyun part = &xpc_partitions[partid];
859*4882a593Smuzhiyun
860*4882a593Smuzhiyun if (!xpc_part_ref(part))
861*4882a593Smuzhiyun continue;
862*4882a593Smuzhiyun
863*4882a593Smuzhiyun ch = &part->channels[ch_number];
864*4882a593Smuzhiyun
865*4882a593Smuzhiyun if (!(ch->flags & XPC_C_WDISCONNECT)) {
866*4882a593Smuzhiyun xpc_part_deref(part);
867*4882a593Smuzhiyun continue;
868*4882a593Smuzhiyun }
869*4882a593Smuzhiyun
870*4882a593Smuzhiyun wait_for_completion(&ch->wdisconnect_wait);
871*4882a593Smuzhiyun
872*4882a593Smuzhiyun spin_lock_irqsave(&ch->lock, irq_flags);
873*4882a593Smuzhiyun DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
874*4882a593Smuzhiyun wakeup_channel_mgr = 0;
875*4882a593Smuzhiyun
876*4882a593Smuzhiyun if (ch->delayed_chctl_flags) {
877*4882a593Smuzhiyun if (part->act_state != XPC_P_AS_DEACTIVATING) {
878*4882a593Smuzhiyun spin_lock(&part->chctl_lock);
879*4882a593Smuzhiyun part->chctl.flags[ch->number] |=
880*4882a593Smuzhiyun ch->delayed_chctl_flags;
881*4882a593Smuzhiyun spin_unlock(&part->chctl_lock);
882*4882a593Smuzhiyun wakeup_channel_mgr = 1;
883*4882a593Smuzhiyun }
884*4882a593Smuzhiyun ch->delayed_chctl_flags = 0;
885*4882a593Smuzhiyun }
886*4882a593Smuzhiyun
887*4882a593Smuzhiyun ch->flags &= ~XPC_C_WDISCONNECT;
888*4882a593Smuzhiyun spin_unlock_irqrestore(&ch->lock, irq_flags);
889*4882a593Smuzhiyun
890*4882a593Smuzhiyun if (wakeup_channel_mgr)
891*4882a593Smuzhiyun xpc_wakeup_channel_mgr(part);
892*4882a593Smuzhiyun
893*4882a593Smuzhiyun xpc_part_deref(part);
894*4882a593Smuzhiyun }
895*4882a593Smuzhiyun }
896*4882a593Smuzhiyun
897*4882a593Smuzhiyun static int
xpc_setup_partitions(void)898*4882a593Smuzhiyun xpc_setup_partitions(void)
899*4882a593Smuzhiyun {
900*4882a593Smuzhiyun short partid;
901*4882a593Smuzhiyun struct xpc_partition *part;
902*4882a593Smuzhiyun
903*4882a593Smuzhiyun xpc_partitions = kcalloc(xp_max_npartitions,
904*4882a593Smuzhiyun sizeof(struct xpc_partition),
905*4882a593Smuzhiyun GFP_KERNEL);
906*4882a593Smuzhiyun if (xpc_partitions == NULL) {
907*4882a593Smuzhiyun dev_err(xpc_part, "can't get memory for partition structure\n");
908*4882a593Smuzhiyun return -ENOMEM;
909*4882a593Smuzhiyun }
910*4882a593Smuzhiyun
911*4882a593Smuzhiyun /*
912*4882a593Smuzhiyun * The first few fields of each entry of xpc_partitions[] need to
913*4882a593Smuzhiyun * be initialized now so that calls to xpc_connect() and
914*4882a593Smuzhiyun * xpc_disconnect() can be made prior to the activation of any remote
915*4882a593Smuzhiyun * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
916*4882a593Smuzhiyun * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
917*4882a593Smuzhiyun * PARTITION HAS BEEN ACTIVATED.
918*4882a593Smuzhiyun */
919*4882a593Smuzhiyun for (partid = 0; partid < xp_max_npartitions; partid++) {
920*4882a593Smuzhiyun part = &xpc_partitions[partid];
921*4882a593Smuzhiyun
922*4882a593Smuzhiyun DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
923*4882a593Smuzhiyun
924*4882a593Smuzhiyun part->activate_IRQ_rcvd = 0;
925*4882a593Smuzhiyun spin_lock_init(&part->act_lock);
926*4882a593Smuzhiyun part->act_state = XPC_P_AS_INACTIVE;
927*4882a593Smuzhiyun XPC_SET_REASON(part, 0, 0);
928*4882a593Smuzhiyun
929*4882a593Smuzhiyun timer_setup(&part->disengage_timer,
930*4882a593Smuzhiyun xpc_timeout_partition_disengage, 0);
931*4882a593Smuzhiyun
932*4882a593Smuzhiyun part->setup_state = XPC_P_SS_UNSET;
933*4882a593Smuzhiyun init_waitqueue_head(&part->teardown_wq);
934*4882a593Smuzhiyun atomic_set(&part->references, 0);
935*4882a593Smuzhiyun }
936*4882a593Smuzhiyun
937*4882a593Smuzhiyun return xpc_arch_ops.setup_partitions();
938*4882a593Smuzhiyun }
939*4882a593Smuzhiyun
940*4882a593Smuzhiyun static void
xpc_teardown_partitions(void)941*4882a593Smuzhiyun xpc_teardown_partitions(void)
942*4882a593Smuzhiyun {
943*4882a593Smuzhiyun xpc_arch_ops.teardown_partitions();
944*4882a593Smuzhiyun kfree(xpc_partitions);
945*4882a593Smuzhiyun }
946*4882a593Smuzhiyun
947*4882a593Smuzhiyun static void
xpc_do_exit(enum xp_retval reason)948*4882a593Smuzhiyun xpc_do_exit(enum xp_retval reason)
949*4882a593Smuzhiyun {
950*4882a593Smuzhiyun short partid;
951*4882a593Smuzhiyun int active_part_count, printed_waiting_msg = 0;
952*4882a593Smuzhiyun struct xpc_partition *part;
953*4882a593Smuzhiyun unsigned long printmsg_time, disengage_timeout = 0;
954*4882a593Smuzhiyun
955*4882a593Smuzhiyun /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
956*4882a593Smuzhiyun DBUG_ON(xpc_exiting == 1);
957*4882a593Smuzhiyun
958*4882a593Smuzhiyun /*
959*4882a593Smuzhiyun * Let the heartbeat checker thread and the discovery thread
960*4882a593Smuzhiyun * (if one is running) know that they should exit. Also wake up
961*4882a593Smuzhiyun * the heartbeat checker thread in case it's sleeping.
962*4882a593Smuzhiyun */
963*4882a593Smuzhiyun xpc_exiting = 1;
964*4882a593Smuzhiyun wake_up_interruptible(&xpc_activate_IRQ_wq);
965*4882a593Smuzhiyun
966*4882a593Smuzhiyun /* wait for the discovery thread to exit */
967*4882a593Smuzhiyun wait_for_completion(&xpc_discovery_exited);
968*4882a593Smuzhiyun
969*4882a593Smuzhiyun /* wait for the heartbeat checker thread to exit */
970*4882a593Smuzhiyun wait_for_completion(&xpc_hb_checker_exited);
971*4882a593Smuzhiyun
972*4882a593Smuzhiyun /* sleep for a 1/3 of a second or so */
973*4882a593Smuzhiyun (void)msleep_interruptible(300);
974*4882a593Smuzhiyun
975*4882a593Smuzhiyun /* wait for all partitions to become inactive */
976*4882a593Smuzhiyun
977*4882a593Smuzhiyun printmsg_time = jiffies + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
978*4882a593Smuzhiyun xpc_disengage_timedout = 0;
979*4882a593Smuzhiyun
980*4882a593Smuzhiyun do {
981*4882a593Smuzhiyun active_part_count = 0;
982*4882a593Smuzhiyun
983*4882a593Smuzhiyun for (partid = 0; partid < xp_max_npartitions; partid++) {
984*4882a593Smuzhiyun part = &xpc_partitions[partid];
985*4882a593Smuzhiyun
986*4882a593Smuzhiyun if (xpc_partition_disengaged(part) &&
987*4882a593Smuzhiyun part->act_state == XPC_P_AS_INACTIVE) {
988*4882a593Smuzhiyun continue;
989*4882a593Smuzhiyun }
990*4882a593Smuzhiyun
991*4882a593Smuzhiyun active_part_count++;
992*4882a593Smuzhiyun
993*4882a593Smuzhiyun XPC_DEACTIVATE_PARTITION(part, reason);
994*4882a593Smuzhiyun
995*4882a593Smuzhiyun if (part->disengage_timeout > disengage_timeout)
996*4882a593Smuzhiyun disengage_timeout = part->disengage_timeout;
997*4882a593Smuzhiyun }
998*4882a593Smuzhiyun
999*4882a593Smuzhiyun if (xpc_arch_ops.any_partition_engaged()) {
1000*4882a593Smuzhiyun if (time_is_before_jiffies(printmsg_time)) {
1001*4882a593Smuzhiyun dev_info(xpc_part, "waiting for remote "
1002*4882a593Smuzhiyun "partitions to deactivate, timeout in "
1003*4882a593Smuzhiyun "%ld seconds\n", (disengage_timeout -
1004*4882a593Smuzhiyun jiffies) / HZ);
1005*4882a593Smuzhiyun printmsg_time = jiffies +
1006*4882a593Smuzhiyun (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
1007*4882a593Smuzhiyun printed_waiting_msg = 1;
1008*4882a593Smuzhiyun }
1009*4882a593Smuzhiyun
1010*4882a593Smuzhiyun } else if (active_part_count > 0) {
1011*4882a593Smuzhiyun if (printed_waiting_msg) {
1012*4882a593Smuzhiyun dev_info(xpc_part, "waiting for local partition"
1013*4882a593Smuzhiyun " to deactivate\n");
1014*4882a593Smuzhiyun printed_waiting_msg = 0;
1015*4882a593Smuzhiyun }
1016*4882a593Smuzhiyun
1017*4882a593Smuzhiyun } else {
1018*4882a593Smuzhiyun if (!xpc_disengage_timedout) {
1019*4882a593Smuzhiyun dev_info(xpc_part, "all partitions have "
1020*4882a593Smuzhiyun "deactivated\n");
1021*4882a593Smuzhiyun }
1022*4882a593Smuzhiyun break;
1023*4882a593Smuzhiyun }
1024*4882a593Smuzhiyun
1025*4882a593Smuzhiyun /* sleep for a 1/3 of a second or so */
1026*4882a593Smuzhiyun (void)msleep_interruptible(300);
1027*4882a593Smuzhiyun
1028*4882a593Smuzhiyun } while (1);
1029*4882a593Smuzhiyun
1030*4882a593Smuzhiyun DBUG_ON(xpc_arch_ops.any_partition_engaged());
1031*4882a593Smuzhiyun
1032*4882a593Smuzhiyun xpc_teardown_rsvd_page();
1033*4882a593Smuzhiyun
1034*4882a593Smuzhiyun if (reason == xpUnloading) {
1035*4882a593Smuzhiyun (void)unregister_die_notifier(&xpc_die_notifier);
1036*4882a593Smuzhiyun (void)unregister_reboot_notifier(&xpc_reboot_notifier);
1037*4882a593Smuzhiyun }
1038*4882a593Smuzhiyun
1039*4882a593Smuzhiyun /* clear the interface to XPC's functions */
1040*4882a593Smuzhiyun xpc_clear_interface();
1041*4882a593Smuzhiyun
1042*4882a593Smuzhiyun if (xpc_sysctl)
1043*4882a593Smuzhiyun unregister_sysctl_table(xpc_sysctl);
1044*4882a593Smuzhiyun
1045*4882a593Smuzhiyun xpc_teardown_partitions();
1046*4882a593Smuzhiyun
1047*4882a593Smuzhiyun if (is_uv_system())
1048*4882a593Smuzhiyun xpc_exit_uv();
1049*4882a593Smuzhiyun }
1050*4882a593Smuzhiyun
1051*4882a593Smuzhiyun /*
1052*4882a593Smuzhiyun * This function is called when the system is being rebooted.
1053*4882a593Smuzhiyun */
1054*4882a593Smuzhiyun static int
xpc_system_reboot(struct notifier_block * nb,unsigned long event,void * unused)1055*4882a593Smuzhiyun xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
1056*4882a593Smuzhiyun {
1057*4882a593Smuzhiyun enum xp_retval reason;
1058*4882a593Smuzhiyun
1059*4882a593Smuzhiyun switch (event) {
1060*4882a593Smuzhiyun case SYS_RESTART:
1061*4882a593Smuzhiyun reason = xpSystemReboot;
1062*4882a593Smuzhiyun break;
1063*4882a593Smuzhiyun case SYS_HALT:
1064*4882a593Smuzhiyun reason = xpSystemHalt;
1065*4882a593Smuzhiyun break;
1066*4882a593Smuzhiyun case SYS_POWER_OFF:
1067*4882a593Smuzhiyun reason = xpSystemPoweroff;
1068*4882a593Smuzhiyun break;
1069*4882a593Smuzhiyun default:
1070*4882a593Smuzhiyun reason = xpSystemGoingDown;
1071*4882a593Smuzhiyun }
1072*4882a593Smuzhiyun
1073*4882a593Smuzhiyun xpc_do_exit(reason);
1074*4882a593Smuzhiyun return NOTIFY_DONE;
1075*4882a593Smuzhiyun }
1076*4882a593Smuzhiyun
1077*4882a593Smuzhiyun /* Used to only allow one cpu to complete disconnect */
1078*4882a593Smuzhiyun static unsigned int xpc_die_disconnecting;
1079*4882a593Smuzhiyun
1080*4882a593Smuzhiyun /*
1081*4882a593Smuzhiyun * Notify other partitions to deactivate from us by first disengaging from all
1082*4882a593Smuzhiyun * references to our memory.
1083*4882a593Smuzhiyun */
1084*4882a593Smuzhiyun static void
xpc_die_deactivate(void)1085*4882a593Smuzhiyun xpc_die_deactivate(void)
1086*4882a593Smuzhiyun {
1087*4882a593Smuzhiyun struct xpc_partition *part;
1088*4882a593Smuzhiyun short partid;
1089*4882a593Smuzhiyun int any_engaged;
1090*4882a593Smuzhiyun long keep_waiting;
1091*4882a593Smuzhiyun long wait_to_print;
1092*4882a593Smuzhiyun
1093*4882a593Smuzhiyun if (cmpxchg(&xpc_die_disconnecting, 0, 1))
1094*4882a593Smuzhiyun return;
1095*4882a593Smuzhiyun
1096*4882a593Smuzhiyun /* keep xpc_hb_checker thread from doing anything (just in case) */
1097*4882a593Smuzhiyun xpc_exiting = 1;
1098*4882a593Smuzhiyun
1099*4882a593Smuzhiyun xpc_arch_ops.disallow_all_hbs(); /*indicate we're deactivated */
1100*4882a593Smuzhiyun
1101*4882a593Smuzhiyun for (partid = 0; partid < xp_max_npartitions; partid++) {
1102*4882a593Smuzhiyun part = &xpc_partitions[partid];
1103*4882a593Smuzhiyun
1104*4882a593Smuzhiyun if (xpc_arch_ops.partition_engaged(partid) ||
1105*4882a593Smuzhiyun part->act_state != XPC_P_AS_INACTIVE) {
1106*4882a593Smuzhiyun xpc_arch_ops.request_partition_deactivation(part);
1107*4882a593Smuzhiyun xpc_arch_ops.indicate_partition_disengaged(part);
1108*4882a593Smuzhiyun }
1109*4882a593Smuzhiyun }
1110*4882a593Smuzhiyun
1111*4882a593Smuzhiyun /*
1112*4882a593Smuzhiyun * Though we requested that all other partitions deactivate from us,
1113*4882a593Smuzhiyun * we only wait until they've all disengaged or we've reached the
1114*4882a593Smuzhiyun * defined timelimit.
1115*4882a593Smuzhiyun *
1116*4882a593Smuzhiyun * Given that one iteration through the following while-loop takes
1117*4882a593Smuzhiyun * approximately 200 microseconds, calculate the #of loops to take
1118*4882a593Smuzhiyun * before bailing and the #of loops before printing a waiting message.
1119*4882a593Smuzhiyun */
1120*4882a593Smuzhiyun keep_waiting = xpc_disengage_timelimit * 1000 * 5;
1121*4882a593Smuzhiyun wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5;
1122*4882a593Smuzhiyun
1123*4882a593Smuzhiyun while (1) {
1124*4882a593Smuzhiyun any_engaged = xpc_arch_ops.any_partition_engaged();
1125*4882a593Smuzhiyun if (!any_engaged) {
1126*4882a593Smuzhiyun dev_info(xpc_part, "all partitions have deactivated\n");
1127*4882a593Smuzhiyun break;
1128*4882a593Smuzhiyun }
1129*4882a593Smuzhiyun
1130*4882a593Smuzhiyun if (!keep_waiting--) {
1131*4882a593Smuzhiyun for (partid = 0; partid < xp_max_npartitions;
1132*4882a593Smuzhiyun partid++) {
1133*4882a593Smuzhiyun if (xpc_arch_ops.partition_engaged(partid)) {
1134*4882a593Smuzhiyun dev_info(xpc_part, "deactivate from "
1135*4882a593Smuzhiyun "remote partition %d timed "
1136*4882a593Smuzhiyun "out\n", partid);
1137*4882a593Smuzhiyun }
1138*4882a593Smuzhiyun }
1139*4882a593Smuzhiyun break;
1140*4882a593Smuzhiyun }
1141*4882a593Smuzhiyun
1142*4882a593Smuzhiyun if (!wait_to_print--) {
1143*4882a593Smuzhiyun dev_info(xpc_part, "waiting for remote partitions to "
1144*4882a593Smuzhiyun "deactivate, timeout in %ld seconds\n",
1145*4882a593Smuzhiyun keep_waiting / (1000 * 5));
1146*4882a593Smuzhiyun wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL *
1147*4882a593Smuzhiyun 1000 * 5;
1148*4882a593Smuzhiyun }
1149*4882a593Smuzhiyun
1150*4882a593Smuzhiyun udelay(200);
1151*4882a593Smuzhiyun }
1152*4882a593Smuzhiyun }
1153*4882a593Smuzhiyun
1154*4882a593Smuzhiyun /*
1155*4882a593Smuzhiyun * This function is called when the system is being restarted or halted due
1156*4882a593Smuzhiyun * to some sort of system failure. If this is the case we need to notify the
1157*4882a593Smuzhiyun * other partitions to disengage from all references to our memory.
1158*4882a593Smuzhiyun * This function can also be called when our heartbeater could be offlined
1159*4882a593Smuzhiyun * for a time. In this case we need to notify other partitions to not worry
1160*4882a593Smuzhiyun * about the lack of a heartbeat.
1161*4882a593Smuzhiyun */
1162*4882a593Smuzhiyun static int
xpc_system_die(struct notifier_block * nb,unsigned long event,void * _die_args)1163*4882a593Smuzhiyun xpc_system_die(struct notifier_block *nb, unsigned long event, void *_die_args)
1164*4882a593Smuzhiyun {
1165*4882a593Smuzhiyun #ifdef CONFIG_IA64 /* !!! temporary kludge */
1166*4882a593Smuzhiyun switch (event) {
1167*4882a593Smuzhiyun case DIE_MACHINE_RESTART:
1168*4882a593Smuzhiyun case DIE_MACHINE_HALT:
1169*4882a593Smuzhiyun xpc_die_deactivate();
1170*4882a593Smuzhiyun break;
1171*4882a593Smuzhiyun
1172*4882a593Smuzhiyun case DIE_KDEBUG_ENTER:
1173*4882a593Smuzhiyun /* Should lack of heartbeat be ignored by other partitions? */
1174*4882a593Smuzhiyun if (!xpc_kdebug_ignore)
1175*4882a593Smuzhiyun break;
1176*4882a593Smuzhiyun
1177*4882a593Smuzhiyun fallthrough;
1178*4882a593Smuzhiyun case DIE_MCA_MONARCH_ENTER:
1179*4882a593Smuzhiyun case DIE_INIT_MONARCH_ENTER:
1180*4882a593Smuzhiyun xpc_arch_ops.offline_heartbeat();
1181*4882a593Smuzhiyun break;
1182*4882a593Smuzhiyun
1183*4882a593Smuzhiyun case DIE_KDEBUG_LEAVE:
1184*4882a593Smuzhiyun /* Is lack of heartbeat being ignored by other partitions? */
1185*4882a593Smuzhiyun if (!xpc_kdebug_ignore)
1186*4882a593Smuzhiyun break;
1187*4882a593Smuzhiyun
1188*4882a593Smuzhiyun fallthrough;
1189*4882a593Smuzhiyun case DIE_MCA_MONARCH_LEAVE:
1190*4882a593Smuzhiyun case DIE_INIT_MONARCH_LEAVE:
1191*4882a593Smuzhiyun xpc_arch_ops.online_heartbeat();
1192*4882a593Smuzhiyun break;
1193*4882a593Smuzhiyun }
1194*4882a593Smuzhiyun #else
1195*4882a593Smuzhiyun struct die_args *die_args = _die_args;
1196*4882a593Smuzhiyun
1197*4882a593Smuzhiyun switch (event) {
1198*4882a593Smuzhiyun case DIE_TRAP:
1199*4882a593Smuzhiyun if (die_args->trapnr == X86_TRAP_DF)
1200*4882a593Smuzhiyun xpc_die_deactivate();
1201*4882a593Smuzhiyun
1202*4882a593Smuzhiyun if (((die_args->trapnr == X86_TRAP_MF) ||
1203*4882a593Smuzhiyun (die_args->trapnr == X86_TRAP_XF)) &&
1204*4882a593Smuzhiyun !user_mode(die_args->regs))
1205*4882a593Smuzhiyun xpc_die_deactivate();
1206*4882a593Smuzhiyun
1207*4882a593Smuzhiyun break;
1208*4882a593Smuzhiyun case DIE_INT3:
1209*4882a593Smuzhiyun case DIE_DEBUG:
1210*4882a593Smuzhiyun break;
1211*4882a593Smuzhiyun case DIE_OOPS:
1212*4882a593Smuzhiyun case DIE_GPF:
1213*4882a593Smuzhiyun default:
1214*4882a593Smuzhiyun xpc_die_deactivate();
1215*4882a593Smuzhiyun }
1216*4882a593Smuzhiyun #endif
1217*4882a593Smuzhiyun
1218*4882a593Smuzhiyun return NOTIFY_DONE;
1219*4882a593Smuzhiyun }
1220*4882a593Smuzhiyun
1221*4882a593Smuzhiyun static int __init
xpc_init(void)1222*4882a593Smuzhiyun xpc_init(void)
1223*4882a593Smuzhiyun {
1224*4882a593Smuzhiyun int ret;
1225*4882a593Smuzhiyun struct task_struct *kthread;
1226*4882a593Smuzhiyun
1227*4882a593Smuzhiyun dev_set_name(xpc_part, "part");
1228*4882a593Smuzhiyun dev_set_name(xpc_chan, "chan");
1229*4882a593Smuzhiyun
1230*4882a593Smuzhiyun if (is_uv_system()) {
1231*4882a593Smuzhiyun ret = xpc_init_uv();
1232*4882a593Smuzhiyun
1233*4882a593Smuzhiyun } else {
1234*4882a593Smuzhiyun ret = -ENODEV;
1235*4882a593Smuzhiyun }
1236*4882a593Smuzhiyun
1237*4882a593Smuzhiyun if (ret != 0)
1238*4882a593Smuzhiyun return ret;
1239*4882a593Smuzhiyun
1240*4882a593Smuzhiyun ret = xpc_setup_partitions();
1241*4882a593Smuzhiyun if (ret != 0) {
1242*4882a593Smuzhiyun dev_err(xpc_part, "can't get memory for partition structure\n");
1243*4882a593Smuzhiyun goto out_1;
1244*4882a593Smuzhiyun }
1245*4882a593Smuzhiyun
1246*4882a593Smuzhiyun xpc_sysctl = register_sysctl_table(xpc_sys_dir);
1247*4882a593Smuzhiyun
1248*4882a593Smuzhiyun /*
1249*4882a593Smuzhiyun * Fill the partition reserved page with the information needed by
1250*4882a593Smuzhiyun * other partitions to discover we are alive and establish initial
1251*4882a593Smuzhiyun * communications.
1252*4882a593Smuzhiyun */
1253*4882a593Smuzhiyun ret = xpc_setup_rsvd_page();
1254*4882a593Smuzhiyun if (ret != 0) {
1255*4882a593Smuzhiyun dev_err(xpc_part, "can't setup our reserved page\n");
1256*4882a593Smuzhiyun goto out_2;
1257*4882a593Smuzhiyun }
1258*4882a593Smuzhiyun
1259*4882a593Smuzhiyun /* add ourselves to the reboot_notifier_list */
1260*4882a593Smuzhiyun ret = register_reboot_notifier(&xpc_reboot_notifier);
1261*4882a593Smuzhiyun if (ret != 0)
1262*4882a593Smuzhiyun dev_warn(xpc_part, "can't register reboot notifier\n");
1263*4882a593Smuzhiyun
1264*4882a593Smuzhiyun /* add ourselves to the die_notifier list */
1265*4882a593Smuzhiyun ret = register_die_notifier(&xpc_die_notifier);
1266*4882a593Smuzhiyun if (ret != 0)
1267*4882a593Smuzhiyun dev_warn(xpc_part, "can't register die notifier\n");
1268*4882a593Smuzhiyun
1269*4882a593Smuzhiyun /*
1270*4882a593Smuzhiyun * The real work-horse behind xpc. This processes incoming
1271*4882a593Smuzhiyun * interrupts and monitors remote heartbeats.
1272*4882a593Smuzhiyun */
1273*4882a593Smuzhiyun kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME);
1274*4882a593Smuzhiyun if (IS_ERR(kthread)) {
1275*4882a593Smuzhiyun dev_err(xpc_part, "failed while forking hb check thread\n");
1276*4882a593Smuzhiyun ret = -EBUSY;
1277*4882a593Smuzhiyun goto out_3;
1278*4882a593Smuzhiyun }
1279*4882a593Smuzhiyun
1280*4882a593Smuzhiyun /*
1281*4882a593Smuzhiyun * Startup a thread that will attempt to discover other partitions to
1282*4882a593Smuzhiyun * activate based on info provided by SAL. This new thread is short
1283*4882a593Smuzhiyun * lived and will exit once discovery is complete.
1284*4882a593Smuzhiyun */
1285*4882a593Smuzhiyun kthread = kthread_run(xpc_initiate_discovery, NULL,
1286*4882a593Smuzhiyun XPC_DISCOVERY_THREAD_NAME);
1287*4882a593Smuzhiyun if (IS_ERR(kthread)) {
1288*4882a593Smuzhiyun dev_err(xpc_part, "failed while forking discovery thread\n");
1289*4882a593Smuzhiyun
1290*4882a593Smuzhiyun /* mark this new thread as a non-starter */
1291*4882a593Smuzhiyun complete(&xpc_discovery_exited);
1292*4882a593Smuzhiyun
1293*4882a593Smuzhiyun xpc_do_exit(xpUnloading);
1294*4882a593Smuzhiyun return -EBUSY;
1295*4882a593Smuzhiyun }
1296*4882a593Smuzhiyun
1297*4882a593Smuzhiyun /* set the interface to point at XPC's functions */
1298*4882a593Smuzhiyun xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect,
1299*4882a593Smuzhiyun xpc_initiate_send, xpc_initiate_send_notify,
1300*4882a593Smuzhiyun xpc_initiate_received, xpc_initiate_partid_to_nasids);
1301*4882a593Smuzhiyun
1302*4882a593Smuzhiyun return 0;
1303*4882a593Smuzhiyun
1304*4882a593Smuzhiyun /* initialization was not successful */
1305*4882a593Smuzhiyun out_3:
1306*4882a593Smuzhiyun xpc_teardown_rsvd_page();
1307*4882a593Smuzhiyun
1308*4882a593Smuzhiyun (void)unregister_die_notifier(&xpc_die_notifier);
1309*4882a593Smuzhiyun (void)unregister_reboot_notifier(&xpc_reboot_notifier);
1310*4882a593Smuzhiyun out_2:
1311*4882a593Smuzhiyun if (xpc_sysctl)
1312*4882a593Smuzhiyun unregister_sysctl_table(xpc_sysctl);
1313*4882a593Smuzhiyun
1314*4882a593Smuzhiyun xpc_teardown_partitions();
1315*4882a593Smuzhiyun out_1:
1316*4882a593Smuzhiyun if (is_uv_system())
1317*4882a593Smuzhiyun xpc_exit_uv();
1318*4882a593Smuzhiyun return ret;
1319*4882a593Smuzhiyun }
1320*4882a593Smuzhiyun
1321*4882a593Smuzhiyun module_init(xpc_init);
1322*4882a593Smuzhiyun
1323*4882a593Smuzhiyun static void __exit
xpc_exit(void)1324*4882a593Smuzhiyun xpc_exit(void)
1325*4882a593Smuzhiyun {
1326*4882a593Smuzhiyun xpc_do_exit(xpUnloading);
1327*4882a593Smuzhiyun }
1328*4882a593Smuzhiyun
1329*4882a593Smuzhiyun module_exit(xpc_exit);
1330*4882a593Smuzhiyun
1331*4882a593Smuzhiyun MODULE_AUTHOR("Silicon Graphics, Inc.");
1332*4882a593Smuzhiyun MODULE_DESCRIPTION("Cross Partition Communication (XPC) support");
1333*4882a593Smuzhiyun MODULE_LICENSE("GPL");
1334*4882a593Smuzhiyun
1335*4882a593Smuzhiyun module_param(xpc_hb_interval, int, 0);
1336*4882a593Smuzhiyun MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between "
1337*4882a593Smuzhiyun "heartbeat increments.");
1338*4882a593Smuzhiyun
1339*4882a593Smuzhiyun module_param(xpc_hb_check_interval, int, 0);
1340*4882a593Smuzhiyun MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
1341*4882a593Smuzhiyun "heartbeat checks.");
1342*4882a593Smuzhiyun
1343*4882a593Smuzhiyun module_param(xpc_disengage_timelimit, int, 0);
1344*4882a593Smuzhiyun MODULE_PARM_DESC(xpc_disengage_timelimit, "Number of seconds to wait "
1345*4882a593Smuzhiyun "for disengage to complete.");
1346*4882a593Smuzhiyun
1347*4882a593Smuzhiyun module_param(xpc_kdebug_ignore, int, 0);
1348*4882a593Smuzhiyun MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
1349*4882a593Smuzhiyun "other partitions when dropping into kdebug.");
1350