1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * PowerNV setup code.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright 2011 IBM Corp.
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #undef DEBUG
9*4882a593Smuzhiyun
10*4882a593Smuzhiyun #include <linux/cpu.h>
11*4882a593Smuzhiyun #include <linux/errno.h>
12*4882a593Smuzhiyun #include <linux/sched.h>
13*4882a593Smuzhiyun #include <linux/kernel.h>
14*4882a593Smuzhiyun #include <linux/tty.h>
15*4882a593Smuzhiyun #include <linux/reboot.h>
16*4882a593Smuzhiyun #include <linux/init.h>
17*4882a593Smuzhiyun #include <linux/console.h>
18*4882a593Smuzhiyun #include <linux/delay.h>
19*4882a593Smuzhiyun #include <linux/irq.h>
20*4882a593Smuzhiyun #include <linux/seq_file.h>
21*4882a593Smuzhiyun #include <linux/of.h>
22*4882a593Smuzhiyun #include <linux/of_fdt.h>
23*4882a593Smuzhiyun #include <linux/interrupt.h>
24*4882a593Smuzhiyun #include <linux/bug.h>
25*4882a593Smuzhiyun #include <linux/pci.h>
26*4882a593Smuzhiyun #include <linux/cpufreq.h>
27*4882a593Smuzhiyun #include <linux/memblock.h>
28*4882a593Smuzhiyun
29*4882a593Smuzhiyun #include <asm/machdep.h>
30*4882a593Smuzhiyun #include <asm/firmware.h>
31*4882a593Smuzhiyun #include <asm/xics.h>
32*4882a593Smuzhiyun #include <asm/xive.h>
33*4882a593Smuzhiyun #include <asm/opal.h>
34*4882a593Smuzhiyun #include <asm/kexec.h>
35*4882a593Smuzhiyun #include <asm/smp.h>
36*4882a593Smuzhiyun #include <asm/tm.h>
37*4882a593Smuzhiyun #include <asm/setup.h>
38*4882a593Smuzhiyun #include <asm/security_features.h>
39*4882a593Smuzhiyun
40*4882a593Smuzhiyun #include "powernv.h"
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun
fw_feature_is(const char * state,const char * name,struct device_node * fw_features)43*4882a593Smuzhiyun static bool fw_feature_is(const char *state, const char *name,
44*4882a593Smuzhiyun struct device_node *fw_features)
45*4882a593Smuzhiyun {
46*4882a593Smuzhiyun struct device_node *np;
47*4882a593Smuzhiyun bool rc = false;
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun np = of_get_child_by_name(fw_features, name);
50*4882a593Smuzhiyun if (np) {
51*4882a593Smuzhiyun rc = of_property_read_bool(np, state);
52*4882a593Smuzhiyun of_node_put(np);
53*4882a593Smuzhiyun }
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun return rc;
56*4882a593Smuzhiyun }
57*4882a593Smuzhiyun
init_fw_feat_flags(struct device_node * np)58*4882a593Smuzhiyun static void init_fw_feat_flags(struct device_node *np)
59*4882a593Smuzhiyun {
60*4882a593Smuzhiyun if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np))
61*4882a593Smuzhiyun security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun if (fw_feature_is("enabled", "fw-bcctrl-serialized", np))
64*4882a593Smuzhiyun security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun if (fw_feature_is("enabled", "inst-l1d-flush-ori30,30,0", np))
67*4882a593Smuzhiyun security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np))
70*4882a593Smuzhiyun security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
71*4882a593Smuzhiyun
72*4882a593Smuzhiyun if (fw_feature_is("enabled", "fw-l1d-thread-split", np))
73*4882a593Smuzhiyun security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
74*4882a593Smuzhiyun
75*4882a593Smuzhiyun if (fw_feature_is("enabled", "fw-count-cache-disabled", np))
76*4882a593Smuzhiyun security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
77*4882a593Smuzhiyun
78*4882a593Smuzhiyun if (fw_feature_is("enabled", "fw-count-cache-flush-bcctr2,0,0", np))
79*4882a593Smuzhiyun security_ftr_set(SEC_FTR_BCCTR_FLUSH_ASSIST);
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun if (fw_feature_is("enabled", "needs-count-cache-flush-on-context-switch", np))
82*4882a593Smuzhiyun security_ftr_set(SEC_FTR_FLUSH_COUNT_CACHE);
83*4882a593Smuzhiyun
84*4882a593Smuzhiyun /*
85*4882a593Smuzhiyun * The features below are enabled by default, so we instead look to see
86*4882a593Smuzhiyun * if firmware has *disabled* them, and clear them if so.
87*4882a593Smuzhiyun */
88*4882a593Smuzhiyun if (fw_feature_is("disabled", "speculation-policy-favor-security", np))
89*4882a593Smuzhiyun security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun if (fw_feature_is("disabled", "needs-l1d-flush-msr-pr-0-to-1", np))
92*4882a593Smuzhiyun security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
93*4882a593Smuzhiyun
94*4882a593Smuzhiyun if (fw_feature_is("disabled", "needs-l1d-flush-msr-hv-1-to-0", np))
95*4882a593Smuzhiyun security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
96*4882a593Smuzhiyun
97*4882a593Smuzhiyun if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np))
98*4882a593Smuzhiyun security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
99*4882a593Smuzhiyun }
100*4882a593Smuzhiyun
pnv_setup_security_mitigations(void)101*4882a593Smuzhiyun static void pnv_setup_security_mitigations(void)
102*4882a593Smuzhiyun {
103*4882a593Smuzhiyun struct device_node *np, *fw_features;
104*4882a593Smuzhiyun enum l1d_flush_type type;
105*4882a593Smuzhiyun bool enable;
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun /* Default to fallback in case fw-features are not available */
108*4882a593Smuzhiyun type = L1D_FLUSH_FALLBACK;
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun np = of_find_node_by_name(NULL, "ibm,opal");
111*4882a593Smuzhiyun fw_features = of_get_child_by_name(np, "fw-features");
112*4882a593Smuzhiyun of_node_put(np);
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun if (fw_features) {
115*4882a593Smuzhiyun init_fw_feat_flags(fw_features);
116*4882a593Smuzhiyun of_node_put(fw_features);
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
119*4882a593Smuzhiyun type = L1D_FLUSH_MTTRIG;
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
122*4882a593Smuzhiyun type = L1D_FLUSH_ORI;
123*4882a593Smuzhiyun }
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun /*
126*4882a593Smuzhiyun * If we are non-Power9 bare metal, we don't need to flush on kernel
127*4882a593Smuzhiyun * entry or after user access: they fix a P9 specific vulnerability.
128*4882a593Smuzhiyun */
129*4882a593Smuzhiyun if (!pvr_version_is(PVR_POWER9)) {
130*4882a593Smuzhiyun security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
131*4882a593Smuzhiyun security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
132*4882a593Smuzhiyun }
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
135*4882a593Smuzhiyun (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \
136*4882a593Smuzhiyun security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun setup_rfi_flush(type, enable);
139*4882a593Smuzhiyun setup_count_cache_flush();
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
142*4882a593Smuzhiyun security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
143*4882a593Smuzhiyun setup_entry_flush(enable);
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
146*4882a593Smuzhiyun security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
147*4882a593Smuzhiyun setup_uaccess_flush(enable);
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun setup_stf_barrier();
150*4882a593Smuzhiyun }
151*4882a593Smuzhiyun
pnv_check_guarded_cores(void)152*4882a593Smuzhiyun static void __init pnv_check_guarded_cores(void)
153*4882a593Smuzhiyun {
154*4882a593Smuzhiyun struct device_node *dn;
155*4882a593Smuzhiyun int bad_count = 0;
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun for_each_node_by_type(dn, "cpu") {
158*4882a593Smuzhiyun if (of_property_match_string(dn, "status", "bad") >= 0)
159*4882a593Smuzhiyun bad_count++;
160*4882a593Smuzhiyun };
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun if (bad_count) {
163*4882a593Smuzhiyun printk(" _ _______________\n");
164*4882a593Smuzhiyun pr_cont(" | | / \\\n");
165*4882a593Smuzhiyun pr_cont(" | | | WARNING! |\n");
166*4882a593Smuzhiyun pr_cont(" | | | |\n");
167*4882a593Smuzhiyun pr_cont(" | | | It looks like |\n");
168*4882a593Smuzhiyun pr_cont(" |_| | you have %*d |\n", 3, bad_count);
169*4882a593Smuzhiyun pr_cont(" _ | guarded cores |\n");
170*4882a593Smuzhiyun pr_cont(" (_) \\_______________/\n");
171*4882a593Smuzhiyun }
172*4882a593Smuzhiyun }
173*4882a593Smuzhiyun
pnv_setup_arch(void)174*4882a593Smuzhiyun static void __init pnv_setup_arch(void)
175*4882a593Smuzhiyun {
176*4882a593Smuzhiyun set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun pnv_setup_security_mitigations();
179*4882a593Smuzhiyun
180*4882a593Smuzhiyun /* Initialize SMP */
181*4882a593Smuzhiyun pnv_smp_init();
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun /* Setup PCI */
184*4882a593Smuzhiyun pnv_pci_init();
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun /* Setup RTC and NVRAM callbacks */
187*4882a593Smuzhiyun if (firmware_has_feature(FW_FEATURE_OPAL))
188*4882a593Smuzhiyun opal_nvram_init();
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun /* Enable NAP mode */
191*4882a593Smuzhiyun powersave_nap = 1;
192*4882a593Smuzhiyun
193*4882a593Smuzhiyun pnv_check_guarded_cores();
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun /* XXX PMCS */
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun pnv_rng_init();
198*4882a593Smuzhiyun }
199*4882a593Smuzhiyun
pnv_init(void)200*4882a593Smuzhiyun static void __init pnv_init(void)
201*4882a593Smuzhiyun {
202*4882a593Smuzhiyun /*
203*4882a593Smuzhiyun * Initialize the LPC bus now so that legacy serial
204*4882a593Smuzhiyun * ports can be found on it
205*4882a593Smuzhiyun */
206*4882a593Smuzhiyun opal_lpc_init();
207*4882a593Smuzhiyun
208*4882a593Smuzhiyun #ifdef CONFIG_HVC_OPAL
209*4882a593Smuzhiyun if (firmware_has_feature(FW_FEATURE_OPAL))
210*4882a593Smuzhiyun hvc_opal_init_early();
211*4882a593Smuzhiyun else
212*4882a593Smuzhiyun #endif
213*4882a593Smuzhiyun add_preferred_console("hvc", 0, NULL);
214*4882a593Smuzhiyun
215*4882a593Smuzhiyun if (!radix_enabled()) {
216*4882a593Smuzhiyun size_t size = sizeof(struct slb_entry) * mmu_slb_size;
217*4882a593Smuzhiyun int i;
218*4882a593Smuzhiyun
219*4882a593Smuzhiyun /* Allocate per cpu area to save old slb contents during MCE */
220*4882a593Smuzhiyun for_each_possible_cpu(i) {
221*4882a593Smuzhiyun paca_ptrs[i]->mce_faulty_slbs =
222*4882a593Smuzhiyun memblock_alloc_node(size,
223*4882a593Smuzhiyun __alignof__(struct slb_entry),
224*4882a593Smuzhiyun cpu_to_node(i));
225*4882a593Smuzhiyun }
226*4882a593Smuzhiyun }
227*4882a593Smuzhiyun }
228*4882a593Smuzhiyun
pnv_init_IRQ(void)229*4882a593Smuzhiyun static void __init pnv_init_IRQ(void)
230*4882a593Smuzhiyun {
231*4882a593Smuzhiyun /* Try using a XIVE if available, otherwise use a XICS */
232*4882a593Smuzhiyun if (!xive_native_init())
233*4882a593Smuzhiyun xics_init();
234*4882a593Smuzhiyun
235*4882a593Smuzhiyun WARN_ON(!ppc_md.get_irq);
236*4882a593Smuzhiyun }
237*4882a593Smuzhiyun
pnv_show_cpuinfo(struct seq_file * m)238*4882a593Smuzhiyun static void pnv_show_cpuinfo(struct seq_file *m)
239*4882a593Smuzhiyun {
240*4882a593Smuzhiyun struct device_node *root;
241*4882a593Smuzhiyun const char *model = "";
242*4882a593Smuzhiyun
243*4882a593Smuzhiyun root = of_find_node_by_path("/");
244*4882a593Smuzhiyun if (root)
245*4882a593Smuzhiyun model = of_get_property(root, "model", NULL);
246*4882a593Smuzhiyun seq_printf(m, "machine\t\t: PowerNV %s\n", model);
247*4882a593Smuzhiyun if (firmware_has_feature(FW_FEATURE_OPAL))
248*4882a593Smuzhiyun seq_printf(m, "firmware\t: OPAL\n");
249*4882a593Smuzhiyun else
250*4882a593Smuzhiyun seq_printf(m, "firmware\t: BML\n");
251*4882a593Smuzhiyun of_node_put(root);
252*4882a593Smuzhiyun if (radix_enabled())
253*4882a593Smuzhiyun seq_printf(m, "MMU\t\t: Radix\n");
254*4882a593Smuzhiyun else
255*4882a593Smuzhiyun seq_printf(m, "MMU\t\t: Hash\n");
256*4882a593Smuzhiyun }
257*4882a593Smuzhiyun
pnv_prepare_going_down(void)258*4882a593Smuzhiyun static void pnv_prepare_going_down(void)
259*4882a593Smuzhiyun {
260*4882a593Smuzhiyun /*
261*4882a593Smuzhiyun * Disable all notifiers from OPAL, we can't
262*4882a593Smuzhiyun * service interrupts anymore anyway
263*4882a593Smuzhiyun */
264*4882a593Smuzhiyun opal_event_shutdown();
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun /* Print flash update message if one is scheduled. */
267*4882a593Smuzhiyun opal_flash_update_print_message();
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun smp_send_stop();
270*4882a593Smuzhiyun
271*4882a593Smuzhiyun hard_irq_disable();
272*4882a593Smuzhiyun }
273*4882a593Smuzhiyun
pnv_restart(char * cmd)274*4882a593Smuzhiyun static void __noreturn pnv_restart(char *cmd)
275*4882a593Smuzhiyun {
276*4882a593Smuzhiyun long rc;
277*4882a593Smuzhiyun
278*4882a593Smuzhiyun pnv_prepare_going_down();
279*4882a593Smuzhiyun
280*4882a593Smuzhiyun do {
281*4882a593Smuzhiyun if (!cmd || !strlen(cmd))
282*4882a593Smuzhiyun rc = opal_cec_reboot();
283*4882a593Smuzhiyun else if (strcmp(cmd, "full") == 0)
284*4882a593Smuzhiyun rc = opal_cec_reboot2(OPAL_REBOOT_FULL_IPL, NULL);
285*4882a593Smuzhiyun else if (strcmp(cmd, "mpipl") == 0)
286*4882a593Smuzhiyun rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, NULL);
287*4882a593Smuzhiyun else if (strcmp(cmd, "error") == 0)
288*4882a593Smuzhiyun rc = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR, NULL);
289*4882a593Smuzhiyun else if (strcmp(cmd, "fast") == 0)
290*4882a593Smuzhiyun rc = opal_cec_reboot2(OPAL_REBOOT_FAST, NULL);
291*4882a593Smuzhiyun else
292*4882a593Smuzhiyun rc = OPAL_UNSUPPORTED;
293*4882a593Smuzhiyun
294*4882a593Smuzhiyun if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
295*4882a593Smuzhiyun /* Opal is busy wait for some time and retry */
296*4882a593Smuzhiyun opal_poll_events(NULL);
297*4882a593Smuzhiyun mdelay(10);
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun } else if (cmd && rc) {
300*4882a593Smuzhiyun /* Unknown error while issuing reboot */
301*4882a593Smuzhiyun if (rc == OPAL_UNSUPPORTED)
302*4882a593Smuzhiyun pr_err("Unsupported '%s' reboot.\n", cmd);
303*4882a593Smuzhiyun else
304*4882a593Smuzhiyun pr_err("Unable to issue '%s' reboot. Err=%ld\n",
305*4882a593Smuzhiyun cmd, rc);
306*4882a593Smuzhiyun pr_info("Forcing a cec-reboot\n");
307*4882a593Smuzhiyun cmd = NULL;
308*4882a593Smuzhiyun rc = OPAL_BUSY;
309*4882a593Smuzhiyun
310*4882a593Smuzhiyun } else if (rc != OPAL_SUCCESS) {
311*4882a593Smuzhiyun /* Unknown error while issuing cec-reboot */
312*4882a593Smuzhiyun pr_err("Unable to reboot. Err=%ld\n", rc);
313*4882a593Smuzhiyun }
314*4882a593Smuzhiyun
315*4882a593Smuzhiyun } while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT);
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun for (;;)
318*4882a593Smuzhiyun opal_poll_events(NULL);
319*4882a593Smuzhiyun }
320*4882a593Smuzhiyun
pnv_power_off(void)321*4882a593Smuzhiyun static void __noreturn pnv_power_off(void)
322*4882a593Smuzhiyun {
323*4882a593Smuzhiyun long rc = OPAL_BUSY;
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun pnv_prepare_going_down();
326*4882a593Smuzhiyun
327*4882a593Smuzhiyun while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
328*4882a593Smuzhiyun rc = opal_cec_power_down(0);
329*4882a593Smuzhiyun if (rc == OPAL_BUSY_EVENT)
330*4882a593Smuzhiyun opal_poll_events(NULL);
331*4882a593Smuzhiyun else
332*4882a593Smuzhiyun mdelay(10);
333*4882a593Smuzhiyun }
334*4882a593Smuzhiyun for (;;)
335*4882a593Smuzhiyun opal_poll_events(NULL);
336*4882a593Smuzhiyun }
337*4882a593Smuzhiyun
pnv_halt(void)338*4882a593Smuzhiyun static void __noreturn pnv_halt(void)
339*4882a593Smuzhiyun {
340*4882a593Smuzhiyun pnv_power_off();
341*4882a593Smuzhiyun }
342*4882a593Smuzhiyun
pnv_progress(char * s,unsigned short hex)343*4882a593Smuzhiyun static void pnv_progress(char *s, unsigned short hex)
344*4882a593Smuzhiyun {
345*4882a593Smuzhiyun }
346*4882a593Smuzhiyun
pnv_shutdown(void)347*4882a593Smuzhiyun static void pnv_shutdown(void)
348*4882a593Smuzhiyun {
349*4882a593Smuzhiyun /* Let the PCI code clear up IODA tables */
350*4882a593Smuzhiyun pnv_pci_shutdown();
351*4882a593Smuzhiyun
352*4882a593Smuzhiyun /*
353*4882a593Smuzhiyun * Stop OPAL activity: Unregister all OPAL interrupts so they
354*4882a593Smuzhiyun * don't fire up while we kexec and make sure all potentially
355*4882a593Smuzhiyun * DMA'ing ops are complete (such as dump retrieval).
356*4882a593Smuzhiyun */
357*4882a593Smuzhiyun opal_shutdown();
358*4882a593Smuzhiyun }
359*4882a593Smuzhiyun
360*4882a593Smuzhiyun #ifdef CONFIG_KEXEC_CORE
pnv_kexec_wait_secondaries_down(void)361*4882a593Smuzhiyun static void pnv_kexec_wait_secondaries_down(void)
362*4882a593Smuzhiyun {
363*4882a593Smuzhiyun int my_cpu, i, notified = -1;
364*4882a593Smuzhiyun
365*4882a593Smuzhiyun my_cpu = get_cpu();
366*4882a593Smuzhiyun
367*4882a593Smuzhiyun for_each_online_cpu(i) {
368*4882a593Smuzhiyun uint8_t status;
369*4882a593Smuzhiyun int64_t rc, timeout = 1000;
370*4882a593Smuzhiyun
371*4882a593Smuzhiyun if (i == my_cpu)
372*4882a593Smuzhiyun continue;
373*4882a593Smuzhiyun
374*4882a593Smuzhiyun for (;;) {
375*4882a593Smuzhiyun rc = opal_query_cpu_status(get_hard_smp_processor_id(i),
376*4882a593Smuzhiyun &status);
377*4882a593Smuzhiyun if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED)
378*4882a593Smuzhiyun break;
379*4882a593Smuzhiyun barrier();
380*4882a593Smuzhiyun if (i != notified) {
381*4882a593Smuzhiyun printk(KERN_INFO "kexec: waiting for cpu %d "
382*4882a593Smuzhiyun "(physical %d) to enter OPAL\n",
383*4882a593Smuzhiyun i, paca_ptrs[i]->hw_cpu_id);
384*4882a593Smuzhiyun notified = i;
385*4882a593Smuzhiyun }
386*4882a593Smuzhiyun
387*4882a593Smuzhiyun /*
388*4882a593Smuzhiyun * On crash secondaries might be unreachable or hung,
389*4882a593Smuzhiyun * so timeout if we've waited too long
390*4882a593Smuzhiyun * */
391*4882a593Smuzhiyun mdelay(1);
392*4882a593Smuzhiyun if (timeout-- == 0) {
393*4882a593Smuzhiyun printk(KERN_ERR "kexec: timed out waiting for "
394*4882a593Smuzhiyun "cpu %d (physical %d) to enter OPAL\n",
395*4882a593Smuzhiyun i, paca_ptrs[i]->hw_cpu_id);
396*4882a593Smuzhiyun break;
397*4882a593Smuzhiyun }
398*4882a593Smuzhiyun }
399*4882a593Smuzhiyun }
400*4882a593Smuzhiyun }
401*4882a593Smuzhiyun
pnv_kexec_cpu_down(int crash_shutdown,int secondary)402*4882a593Smuzhiyun static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
403*4882a593Smuzhiyun {
404*4882a593Smuzhiyun u64 reinit_flags;
405*4882a593Smuzhiyun
406*4882a593Smuzhiyun if (xive_enabled())
407*4882a593Smuzhiyun xive_teardown_cpu();
408*4882a593Smuzhiyun else
409*4882a593Smuzhiyun xics_kexec_teardown_cpu(secondary);
410*4882a593Smuzhiyun
411*4882a593Smuzhiyun /* On OPAL, we return all CPUs to firmware */
412*4882a593Smuzhiyun if (!firmware_has_feature(FW_FEATURE_OPAL))
413*4882a593Smuzhiyun return;
414*4882a593Smuzhiyun
415*4882a593Smuzhiyun if (secondary) {
416*4882a593Smuzhiyun /* Return secondary CPUs to firmware on OPAL v3 */
417*4882a593Smuzhiyun mb();
418*4882a593Smuzhiyun get_paca()->kexec_state = KEXEC_STATE_REAL_MODE;
419*4882a593Smuzhiyun mb();
420*4882a593Smuzhiyun
421*4882a593Smuzhiyun /* Return the CPU to OPAL */
422*4882a593Smuzhiyun opal_return_cpu();
423*4882a593Smuzhiyun } else {
424*4882a593Smuzhiyun /* Primary waits for the secondaries to have reached OPAL */
425*4882a593Smuzhiyun pnv_kexec_wait_secondaries_down();
426*4882a593Smuzhiyun
427*4882a593Smuzhiyun /* Switch XIVE back to emulation mode */
428*4882a593Smuzhiyun if (xive_enabled())
429*4882a593Smuzhiyun xive_shutdown();
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun /*
432*4882a593Smuzhiyun * We might be running as little-endian - now that interrupts
433*4882a593Smuzhiyun * are disabled, reset the HILE bit to big-endian so we don't
434*4882a593Smuzhiyun * take interrupts in the wrong endian later
435*4882a593Smuzhiyun *
436*4882a593Smuzhiyun * We reinit to enable both radix and hash on P9 to ensure
437*4882a593Smuzhiyun * the mode used by the next kernel is always supported.
438*4882a593Smuzhiyun */
439*4882a593Smuzhiyun reinit_flags = OPAL_REINIT_CPUS_HILE_BE;
440*4882a593Smuzhiyun if (cpu_has_feature(CPU_FTR_ARCH_300))
441*4882a593Smuzhiyun reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX |
442*4882a593Smuzhiyun OPAL_REINIT_CPUS_MMU_HASH;
443*4882a593Smuzhiyun opal_reinit_cpus(reinit_flags);
444*4882a593Smuzhiyun }
445*4882a593Smuzhiyun }
446*4882a593Smuzhiyun #endif /* CONFIG_KEXEC_CORE */
447*4882a593Smuzhiyun
448*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
pnv_memory_block_size(void)449*4882a593Smuzhiyun static unsigned long pnv_memory_block_size(void)
450*4882a593Smuzhiyun {
451*4882a593Smuzhiyun /*
452*4882a593Smuzhiyun * We map the kernel linear region with 1GB large pages on radix. For
453*4882a593Smuzhiyun * memory hot unplug to work our memory block size must be at least
454*4882a593Smuzhiyun * this size.
455*4882a593Smuzhiyun */
456*4882a593Smuzhiyun if (radix_enabled())
457*4882a593Smuzhiyun return radix_mem_block_size;
458*4882a593Smuzhiyun else
459*4882a593Smuzhiyun return 256UL * 1024 * 1024;
460*4882a593Smuzhiyun }
461*4882a593Smuzhiyun #endif
462*4882a593Smuzhiyun
pnv_setup_machdep_opal(void)463*4882a593Smuzhiyun static void __init pnv_setup_machdep_opal(void)
464*4882a593Smuzhiyun {
465*4882a593Smuzhiyun ppc_md.get_boot_time = opal_get_boot_time;
466*4882a593Smuzhiyun ppc_md.restart = pnv_restart;
467*4882a593Smuzhiyun pm_power_off = pnv_power_off;
468*4882a593Smuzhiyun ppc_md.halt = pnv_halt;
469*4882a593Smuzhiyun /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */
470*4882a593Smuzhiyun ppc_md.machine_check_exception = opal_machine_check;
471*4882a593Smuzhiyun ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
472*4882a593Smuzhiyun if (opal_check_token(OPAL_HANDLE_HMI2))
473*4882a593Smuzhiyun ppc_md.hmi_exception_early = opal_hmi_exception_early2;
474*4882a593Smuzhiyun else
475*4882a593Smuzhiyun ppc_md.hmi_exception_early = opal_hmi_exception_early;
476*4882a593Smuzhiyun ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
477*4882a593Smuzhiyun }
478*4882a593Smuzhiyun
pnv_probe(void)479*4882a593Smuzhiyun static int __init pnv_probe(void)
480*4882a593Smuzhiyun {
481*4882a593Smuzhiyun if (!of_machine_is_compatible("ibm,powernv"))
482*4882a593Smuzhiyun return 0;
483*4882a593Smuzhiyun
484*4882a593Smuzhiyun if (firmware_has_feature(FW_FEATURE_OPAL))
485*4882a593Smuzhiyun pnv_setup_machdep_opal();
486*4882a593Smuzhiyun
487*4882a593Smuzhiyun pr_debug("PowerNV detected !\n");
488*4882a593Smuzhiyun
489*4882a593Smuzhiyun pnv_init();
490*4882a593Smuzhiyun
491*4882a593Smuzhiyun return 1;
492*4882a593Smuzhiyun }
493*4882a593Smuzhiyun
494*4882a593Smuzhiyun #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
pnv_tm_init(void)495*4882a593Smuzhiyun void __init pnv_tm_init(void)
496*4882a593Smuzhiyun {
497*4882a593Smuzhiyun if (!firmware_has_feature(FW_FEATURE_OPAL) ||
498*4882a593Smuzhiyun !pvr_version_is(PVR_POWER9) ||
499*4882a593Smuzhiyun early_cpu_has_feature(CPU_FTR_TM))
500*4882a593Smuzhiyun return;
501*4882a593Smuzhiyun
502*4882a593Smuzhiyun if (opal_reinit_cpus(OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) != OPAL_SUCCESS)
503*4882a593Smuzhiyun return;
504*4882a593Smuzhiyun
505*4882a593Smuzhiyun pr_info("Enabling TM (Transactional Memory) with Suspend Disabled\n");
506*4882a593Smuzhiyun cur_cpu_spec->cpu_features |= CPU_FTR_TM;
507*4882a593Smuzhiyun /* Make sure "normal" HTM is off (it should be) */
508*4882a593Smuzhiyun cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_HTM;
509*4882a593Smuzhiyun /* Turn on no suspend mode, and HTM no SC */
510*4882a593Smuzhiyun cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NO_SUSPEND | \
511*4882a593Smuzhiyun PPC_FEATURE2_HTM_NOSC;
512*4882a593Smuzhiyun tm_suspend_disabled = true;
513*4882a593Smuzhiyun }
514*4882a593Smuzhiyun #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
515*4882a593Smuzhiyun
516*4882a593Smuzhiyun /*
517*4882a593Smuzhiyun * Returns the cpu frequency for 'cpu' in Hz. This is used by
518*4882a593Smuzhiyun * /proc/cpuinfo
519*4882a593Smuzhiyun */
pnv_get_proc_freq(unsigned int cpu)520*4882a593Smuzhiyun static unsigned long pnv_get_proc_freq(unsigned int cpu)
521*4882a593Smuzhiyun {
522*4882a593Smuzhiyun unsigned long ret_freq;
523*4882a593Smuzhiyun
524*4882a593Smuzhiyun ret_freq = cpufreq_get(cpu) * 1000ul;
525*4882a593Smuzhiyun
526*4882a593Smuzhiyun /*
527*4882a593Smuzhiyun * If the backend cpufreq driver does not exist,
528*4882a593Smuzhiyun * then fallback to old way of reporting the clockrate.
529*4882a593Smuzhiyun */
530*4882a593Smuzhiyun if (!ret_freq)
531*4882a593Smuzhiyun ret_freq = ppc_proc_freq;
532*4882a593Smuzhiyun return ret_freq;
533*4882a593Smuzhiyun }
534*4882a593Smuzhiyun
pnv_machine_check_early(struct pt_regs * regs)535*4882a593Smuzhiyun static long pnv_machine_check_early(struct pt_regs *regs)
536*4882a593Smuzhiyun {
537*4882a593Smuzhiyun long handled = 0;
538*4882a593Smuzhiyun
539*4882a593Smuzhiyun if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
540*4882a593Smuzhiyun handled = cur_cpu_spec->machine_check_early(regs);
541*4882a593Smuzhiyun
542*4882a593Smuzhiyun return handled;
543*4882a593Smuzhiyun }
544*4882a593Smuzhiyun
define_machine(powernv)545*4882a593Smuzhiyun define_machine(powernv) {
546*4882a593Smuzhiyun .name = "PowerNV",
547*4882a593Smuzhiyun .probe = pnv_probe,
548*4882a593Smuzhiyun .setup_arch = pnv_setup_arch,
549*4882a593Smuzhiyun .init_IRQ = pnv_init_IRQ,
550*4882a593Smuzhiyun .show_cpuinfo = pnv_show_cpuinfo,
551*4882a593Smuzhiyun .get_proc_freq = pnv_get_proc_freq,
552*4882a593Smuzhiyun .progress = pnv_progress,
553*4882a593Smuzhiyun .machine_shutdown = pnv_shutdown,
554*4882a593Smuzhiyun .power_save = NULL,
555*4882a593Smuzhiyun .calibrate_decr = generic_calibrate_decr,
556*4882a593Smuzhiyun .machine_check_early = pnv_machine_check_early,
557*4882a593Smuzhiyun #ifdef CONFIG_KEXEC_CORE
558*4882a593Smuzhiyun .kexec_cpu_down = pnv_kexec_cpu_down,
559*4882a593Smuzhiyun #endif
560*4882a593Smuzhiyun #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
561*4882a593Smuzhiyun .memory_block_size = pnv_memory_block_size,
562*4882a593Smuzhiyun #endif
563*4882a593Smuzhiyun };
564