1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun * Coherency fabric (Aurora) support for Armada 370, 375, 38x and XP
3*4882a593Smuzhiyun * platforms.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * Copyright (C) 2012 Marvell
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * Yehuda Yitschak <yehuday@marvell.com>
8*4882a593Smuzhiyun * Gregory Clement <gregory.clement@free-electrons.com>
9*4882a593Smuzhiyun * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
10*4882a593Smuzhiyun *
11*4882a593Smuzhiyun * This file is licensed under the terms of the GNU General Public
12*4882a593Smuzhiyun * License version 2. This program is licensed "as is" without any
13*4882a593Smuzhiyun * warranty of any kind, whether express or implied.
14*4882a593Smuzhiyun *
15*4882a593Smuzhiyun * The Armada 370, 375, 38x and XP SOCs have a coherency fabric which is
16*4882a593Smuzhiyun * responsible for ensuring hardware coherency between all CPUs and between
17*4882a593Smuzhiyun * CPUs and I/O masters. This file initializes the coherency fabric and
18*4882a593Smuzhiyun * supplies basic routines for configuring and controlling hardware coherency
19*4882a593Smuzhiyun */
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun #define pr_fmt(fmt) "mvebu-coherency: " fmt
22*4882a593Smuzhiyun
23*4882a593Smuzhiyun #include <linux/kernel.h>
24*4882a593Smuzhiyun #include <linux/init.h>
25*4882a593Smuzhiyun #include <linux/of_address.h>
26*4882a593Smuzhiyun #include <linux/io.h>
27*4882a593Smuzhiyun #include <linux/smp.h>
28*4882a593Smuzhiyun #include <linux/dma-map-ops.h>
29*4882a593Smuzhiyun #include <linux/platform_device.h>
30*4882a593Smuzhiyun #include <linux/slab.h>
31*4882a593Smuzhiyun #include <linux/mbus.h>
32*4882a593Smuzhiyun #include <linux/pci.h>
33*4882a593Smuzhiyun #include <asm/smp_plat.h>
34*4882a593Smuzhiyun #include <asm/cacheflush.h>
35*4882a593Smuzhiyun #include <asm/mach/map.h>
36*4882a593Smuzhiyun #include <asm/dma-mapping.h>
37*4882a593Smuzhiyun #include "coherency.h"
38*4882a593Smuzhiyun #include "mvebu-soc-id.h"
39*4882a593Smuzhiyun
40*4882a593Smuzhiyun unsigned long coherency_phys_base;
41*4882a593Smuzhiyun void __iomem *coherency_base;
42*4882a593Smuzhiyun static void __iomem *coherency_cpu_base;
43*4882a593Smuzhiyun static void __iomem *cpu_config_base;
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun /* Coherency fabric registers */
46*4882a593Smuzhiyun #define IO_SYNC_BARRIER_CTL_OFFSET 0x0
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun enum {
49*4882a593Smuzhiyun COHERENCY_FABRIC_TYPE_NONE,
50*4882a593Smuzhiyun COHERENCY_FABRIC_TYPE_ARMADA_370_XP,
51*4882a593Smuzhiyun COHERENCY_FABRIC_TYPE_ARMADA_375,
52*4882a593Smuzhiyun COHERENCY_FABRIC_TYPE_ARMADA_380,
53*4882a593Smuzhiyun };
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun static const struct of_device_id of_coherency_table[] = {
56*4882a593Smuzhiyun {.compatible = "marvell,coherency-fabric",
57*4882a593Smuzhiyun .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_370_XP },
58*4882a593Smuzhiyun {.compatible = "marvell,armada-375-coherency-fabric",
59*4882a593Smuzhiyun .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_375 },
60*4882a593Smuzhiyun {.compatible = "marvell,armada-380-coherency-fabric",
61*4882a593Smuzhiyun .data = (void *) COHERENCY_FABRIC_TYPE_ARMADA_380 },
62*4882a593Smuzhiyun { /* end of list */ },
63*4882a593Smuzhiyun };
64*4882a593Smuzhiyun
65*4882a593Smuzhiyun /* Functions defined in coherency_ll.S */
66*4882a593Smuzhiyun int ll_enable_coherency(void);
67*4882a593Smuzhiyun void ll_add_cpu_to_smp_group(void);
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun #define CPU_CONFIG_SHARED_L2 BIT(16)
70*4882a593Smuzhiyun
71*4882a593Smuzhiyun /*
72*4882a593Smuzhiyun * Disable the "Shared L2 Present" bit in CPU Configuration register
73*4882a593Smuzhiyun * on Armada XP.
74*4882a593Smuzhiyun *
75*4882a593Smuzhiyun * The "Shared L2 Present" bit affects the "level of coherence" value
76*4882a593Smuzhiyun * in the clidr CP15 register. Cache operation functions such as
77*4882a593Smuzhiyun * "flush all" and "invalidate all" operate on all the cache levels
78*4882a593Smuzhiyun * that included in the defined level of coherence. When HW I/O
79*4882a593Smuzhiyun * coherency is used, this bit causes unnecessary flushes of the L2
80*4882a593Smuzhiyun * cache.
81*4882a593Smuzhiyun */
armada_xp_clear_shared_l2(void)82*4882a593Smuzhiyun static void armada_xp_clear_shared_l2(void)
83*4882a593Smuzhiyun {
84*4882a593Smuzhiyun u32 reg;
85*4882a593Smuzhiyun
86*4882a593Smuzhiyun if (!cpu_config_base)
87*4882a593Smuzhiyun return;
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun reg = readl(cpu_config_base);
90*4882a593Smuzhiyun reg &= ~CPU_CONFIG_SHARED_L2;
91*4882a593Smuzhiyun writel(reg, cpu_config_base);
92*4882a593Smuzhiyun }
93*4882a593Smuzhiyun
mvebu_hwcc_notifier(struct notifier_block * nb,unsigned long event,void * __dev)94*4882a593Smuzhiyun static int mvebu_hwcc_notifier(struct notifier_block *nb,
95*4882a593Smuzhiyun unsigned long event, void *__dev)
96*4882a593Smuzhiyun {
97*4882a593Smuzhiyun struct device *dev = __dev;
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun if (event != BUS_NOTIFY_ADD_DEVICE)
100*4882a593Smuzhiyun return NOTIFY_DONE;
101*4882a593Smuzhiyun set_dma_ops(dev, &arm_coherent_dma_ops);
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun return NOTIFY_OK;
104*4882a593Smuzhiyun }
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun static struct notifier_block mvebu_hwcc_nb = {
107*4882a593Smuzhiyun .notifier_call = mvebu_hwcc_notifier,
108*4882a593Smuzhiyun };
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun static struct notifier_block mvebu_hwcc_pci_nb __maybe_unused = {
111*4882a593Smuzhiyun .notifier_call = mvebu_hwcc_notifier,
112*4882a593Smuzhiyun };
113*4882a593Smuzhiyun
armada_xp_clear_l2_starting(unsigned int cpu)114*4882a593Smuzhiyun static int armada_xp_clear_l2_starting(unsigned int cpu)
115*4882a593Smuzhiyun {
116*4882a593Smuzhiyun armada_xp_clear_shared_l2();
117*4882a593Smuzhiyun return 0;
118*4882a593Smuzhiyun }
119*4882a593Smuzhiyun
armada_370_coherency_init(struct device_node * np)120*4882a593Smuzhiyun static void __init armada_370_coherency_init(struct device_node *np)
121*4882a593Smuzhiyun {
122*4882a593Smuzhiyun struct resource res;
123*4882a593Smuzhiyun struct device_node *cpu_config_np;
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun of_address_to_resource(np, 0, &res);
126*4882a593Smuzhiyun coherency_phys_base = res.start;
127*4882a593Smuzhiyun /*
128*4882a593Smuzhiyun * Ensure secondary CPUs will see the updated value,
129*4882a593Smuzhiyun * which they read before they join the coherency
130*4882a593Smuzhiyun * fabric, and therefore before they are coherent with
131*4882a593Smuzhiyun * the boot CPU cache.
132*4882a593Smuzhiyun */
133*4882a593Smuzhiyun sync_cache_w(&coherency_phys_base);
134*4882a593Smuzhiyun coherency_base = of_iomap(np, 0);
135*4882a593Smuzhiyun coherency_cpu_base = of_iomap(np, 1);
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun cpu_config_np = of_find_compatible_node(NULL, NULL,
138*4882a593Smuzhiyun "marvell,armada-xp-cpu-config");
139*4882a593Smuzhiyun if (!cpu_config_np)
140*4882a593Smuzhiyun goto exit;
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun cpu_config_base = of_iomap(cpu_config_np, 0);
143*4882a593Smuzhiyun if (!cpu_config_base) {
144*4882a593Smuzhiyun of_node_put(cpu_config_np);
145*4882a593Smuzhiyun goto exit;
146*4882a593Smuzhiyun }
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun of_node_put(cpu_config_np);
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun cpuhp_setup_state_nocalls(CPUHP_AP_ARM_MVEBU_COHERENCY,
151*4882a593Smuzhiyun "arm/mvebu/coherency:starting",
152*4882a593Smuzhiyun armada_xp_clear_l2_starting, NULL);
153*4882a593Smuzhiyun exit:
154*4882a593Smuzhiyun set_cpu_coherent();
155*4882a593Smuzhiyun }
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun /*
158*4882a593Smuzhiyun * This ioremap hook is used on Armada 375/38x to ensure that all MMIO
159*4882a593Smuzhiyun * areas are mapped as MT_UNCACHED instead of MT_DEVICE. This is
160*4882a593Smuzhiyun * needed for the HW I/O coherency mechanism to work properly without
161*4882a593Smuzhiyun * deadlock.
162*4882a593Smuzhiyun */
163*4882a593Smuzhiyun static void __iomem *
armada_wa_ioremap_caller(phys_addr_t phys_addr,size_t size,unsigned int mtype,void * caller)164*4882a593Smuzhiyun armada_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
165*4882a593Smuzhiyun unsigned int mtype, void *caller)
166*4882a593Smuzhiyun {
167*4882a593Smuzhiyun mtype = MT_UNCACHED;
168*4882a593Smuzhiyun return __arm_ioremap_caller(phys_addr, size, mtype, caller);
169*4882a593Smuzhiyun }
170*4882a593Smuzhiyun
armada_375_380_coherency_init(struct device_node * np)171*4882a593Smuzhiyun static void __init armada_375_380_coherency_init(struct device_node *np)
172*4882a593Smuzhiyun {
173*4882a593Smuzhiyun struct device_node *cache_dn;
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun coherency_cpu_base = of_iomap(np, 0);
176*4882a593Smuzhiyun arch_ioremap_caller = armada_wa_ioremap_caller;
177*4882a593Smuzhiyun pci_ioremap_set_mem_type(MT_UNCACHED);
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun /*
180*4882a593Smuzhiyun * We should switch the PL310 to I/O coherency mode only if
181*4882a593Smuzhiyun * I/O coherency is actually enabled.
182*4882a593Smuzhiyun */
183*4882a593Smuzhiyun if (!coherency_available())
184*4882a593Smuzhiyun return;
185*4882a593Smuzhiyun
186*4882a593Smuzhiyun /*
187*4882a593Smuzhiyun * Add the PL310 property "arm,io-coherent". This makes sure the
188*4882a593Smuzhiyun * outer sync operation is not used, which allows to
189*4882a593Smuzhiyun * workaround the system erratum that causes deadlocks when
190*4882a593Smuzhiyun * doing PCIe in an SMP situation on Armada 375 and Armada
191*4882a593Smuzhiyun * 38x.
192*4882a593Smuzhiyun */
193*4882a593Smuzhiyun for_each_compatible_node(cache_dn, NULL, "arm,pl310-cache") {
194*4882a593Smuzhiyun struct property *p;
195*4882a593Smuzhiyun
196*4882a593Smuzhiyun p = kzalloc(sizeof(*p), GFP_KERNEL);
197*4882a593Smuzhiyun p->name = kstrdup("arm,io-coherent", GFP_KERNEL);
198*4882a593Smuzhiyun of_add_property(cache_dn, p);
199*4882a593Smuzhiyun }
200*4882a593Smuzhiyun }
201*4882a593Smuzhiyun
coherency_type(void)202*4882a593Smuzhiyun static int coherency_type(void)
203*4882a593Smuzhiyun {
204*4882a593Smuzhiyun struct device_node *np;
205*4882a593Smuzhiyun const struct of_device_id *match;
206*4882a593Smuzhiyun int type;
207*4882a593Smuzhiyun
208*4882a593Smuzhiyun /*
209*4882a593Smuzhiyun * The coherency fabric is needed:
210*4882a593Smuzhiyun * - For coherency between processors on Armada XP, so only
211*4882a593Smuzhiyun * when SMP is enabled.
212*4882a593Smuzhiyun * - For coherency between the processor and I/O devices, but
213*4882a593Smuzhiyun * this coherency requires many pre-requisites (write
214*4882a593Smuzhiyun * allocate cache policy, shareable pages, SMP bit set) that
215*4882a593Smuzhiyun * are only meant in SMP situations.
216*4882a593Smuzhiyun *
217*4882a593Smuzhiyun * Note that this means that on Armada 370, there is currently
218*4882a593Smuzhiyun * no way to use hardware I/O coherency, because even when
219*4882a593Smuzhiyun * CONFIG_SMP is enabled, is_smp() returns false due to the
220*4882a593Smuzhiyun * Armada 370 being a single-core processor. To lift this
221*4882a593Smuzhiyun * limitation, we would have to find a way to make the cache
222*4882a593Smuzhiyun * policy set to write-allocate (on all Armada SoCs), and to
223*4882a593Smuzhiyun * set the shareable attribute in page tables (on all Armada
224*4882a593Smuzhiyun * SoCs except the Armada 370). Unfortunately, such decisions
225*4882a593Smuzhiyun * are taken very early in the kernel boot process, at a point
226*4882a593Smuzhiyun * where we don't know yet on which SoC we are running.
227*4882a593Smuzhiyun
228*4882a593Smuzhiyun */
229*4882a593Smuzhiyun if (!is_smp())
230*4882a593Smuzhiyun return COHERENCY_FABRIC_TYPE_NONE;
231*4882a593Smuzhiyun
232*4882a593Smuzhiyun np = of_find_matching_node_and_match(NULL, of_coherency_table, &match);
233*4882a593Smuzhiyun if (!np)
234*4882a593Smuzhiyun return COHERENCY_FABRIC_TYPE_NONE;
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun type = (int) match->data;
237*4882a593Smuzhiyun
238*4882a593Smuzhiyun of_node_put(np);
239*4882a593Smuzhiyun
240*4882a593Smuzhiyun return type;
241*4882a593Smuzhiyun }
242*4882a593Smuzhiyun
set_cpu_coherent(void)243*4882a593Smuzhiyun int set_cpu_coherent(void)
244*4882a593Smuzhiyun {
245*4882a593Smuzhiyun int type = coherency_type();
246*4882a593Smuzhiyun
247*4882a593Smuzhiyun if (type == COHERENCY_FABRIC_TYPE_ARMADA_370_XP) {
248*4882a593Smuzhiyun if (!coherency_base) {
249*4882a593Smuzhiyun pr_warn("Can't make current CPU cache coherent.\n");
250*4882a593Smuzhiyun pr_warn("Coherency fabric is not initialized\n");
251*4882a593Smuzhiyun return 1;
252*4882a593Smuzhiyun }
253*4882a593Smuzhiyun
254*4882a593Smuzhiyun armada_xp_clear_shared_l2();
255*4882a593Smuzhiyun ll_add_cpu_to_smp_group();
256*4882a593Smuzhiyun return ll_enable_coherency();
257*4882a593Smuzhiyun }
258*4882a593Smuzhiyun
259*4882a593Smuzhiyun return 0;
260*4882a593Smuzhiyun }
261*4882a593Smuzhiyun
coherency_available(void)262*4882a593Smuzhiyun int coherency_available(void)
263*4882a593Smuzhiyun {
264*4882a593Smuzhiyun return coherency_type() != COHERENCY_FABRIC_TYPE_NONE;
265*4882a593Smuzhiyun }
266*4882a593Smuzhiyun
coherency_init(void)267*4882a593Smuzhiyun int __init coherency_init(void)
268*4882a593Smuzhiyun {
269*4882a593Smuzhiyun int type = coherency_type();
270*4882a593Smuzhiyun struct device_node *np;
271*4882a593Smuzhiyun
272*4882a593Smuzhiyun np = of_find_matching_node(NULL, of_coherency_table);
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun if (type == COHERENCY_FABRIC_TYPE_ARMADA_370_XP)
275*4882a593Smuzhiyun armada_370_coherency_init(np);
276*4882a593Smuzhiyun else if (type == COHERENCY_FABRIC_TYPE_ARMADA_375 ||
277*4882a593Smuzhiyun type == COHERENCY_FABRIC_TYPE_ARMADA_380)
278*4882a593Smuzhiyun armada_375_380_coherency_init(np);
279*4882a593Smuzhiyun
280*4882a593Smuzhiyun of_node_put(np);
281*4882a593Smuzhiyun
282*4882a593Smuzhiyun return 0;
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun
coherency_late_init(void)285*4882a593Smuzhiyun static int __init coherency_late_init(void)
286*4882a593Smuzhiyun {
287*4882a593Smuzhiyun if (coherency_available())
288*4882a593Smuzhiyun bus_register_notifier(&platform_bus_type,
289*4882a593Smuzhiyun &mvebu_hwcc_nb);
290*4882a593Smuzhiyun return 0;
291*4882a593Smuzhiyun }
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun postcore_initcall(coherency_late_init);
294*4882a593Smuzhiyun
295*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_PCI)
coherency_pci_init(void)296*4882a593Smuzhiyun static int __init coherency_pci_init(void)
297*4882a593Smuzhiyun {
298*4882a593Smuzhiyun if (coherency_available())
299*4882a593Smuzhiyun bus_register_notifier(&pci_bus_type,
300*4882a593Smuzhiyun &mvebu_hwcc_pci_nb);
301*4882a593Smuzhiyun return 0;
302*4882a593Smuzhiyun }
303*4882a593Smuzhiyun
304*4882a593Smuzhiyun arch_initcall(coherency_pci_init);
305*4882a593Smuzhiyun #endif
306