xref: /OK3568_Linux_fs/u-boot/arch/x86/cpu/sipi_vector.S (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun/*
2*4882a593Smuzhiyun * Copyright (c) 2015 Google, Inc
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * SPDX-License-Identifier:	GPL-2.0
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * Taken from coreboot file of the same name
7*4882a593Smuzhiyun */
8*4882a593Smuzhiyun
9*4882a593Smuzhiyun/*
10*4882a593Smuzhiyun * The SIPI vector is responsible for initializing the APs in the sytem. It
11*4882a593Smuzhiyun * loads microcode, sets up MSRs, and enables caching before calling into
12*4882a593Smuzhiyun * C code
13*4882a593Smuzhiyun */
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun#include <asm/global_data.h>
16*4882a593Smuzhiyun#include <asm/msr-index.h>
17*4882a593Smuzhiyun#include <asm/processor.h>
18*4882a593Smuzhiyun#include <asm/processor-flags.h>
19*4882a593Smuzhiyun#include <asm/sipi.h>
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun#define CODE_SEG	(X86_GDT_ENTRY_32BIT_CS * X86_GDT_ENTRY_SIZE)
22*4882a593Smuzhiyun#define DATA_SEG	(X86_GDT_ENTRY_32BIT_DS * X86_GDT_ENTRY_SIZE)
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun/*
25*4882a593Smuzhiyun * First we have the 16-bit section. Every AP process starts here.
26*4882a593Smuzhiyun * The simple task is to load U-Boot's Global Descriptor Table (GDT) to allow
27*4882a593Smuzhiyun * U-Boot's 32-bit code to become visible, then jump to ap_start.
28*4882a593Smuzhiyun *
29*4882a593Smuzhiyun * Note that this code is copied to RAM below 1MB in mp_init.c, and runs from
30*4882a593Smuzhiyun * there, but the 32-bit code (ap_start and onwards) is part of U-Boot and
31*4882a593Smuzhiyun * is therefore relocated to the top of RAM with other U-Boot code. This
32*4882a593Smuzhiyun * means that for the 16-bit code we must write relocatable code, but for the
33*4882a593Smuzhiyun * rest, we can do what we like.
34*4882a593Smuzhiyun */
35*4882a593Smuzhiyun.text
36*4882a593Smuzhiyun.code16
37*4882a593Smuzhiyun.globl ap_start16
38*4882a593Smuzhiyunap_start16:
39*4882a593Smuzhiyun	cli
40*4882a593Smuzhiyun	xorl	%eax, %eax
41*4882a593Smuzhiyun	movl	%eax, %cr3		/* Invalidate TLB */
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun	/* setup the data segment */
44*4882a593Smuzhiyun	movw	%cs, %ax
45*4882a593Smuzhiyun	movw	%ax, %ds
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun	/* Use an address relative to the data segment for the GDT */
48*4882a593Smuzhiyun	movl	$gdtaddr, %ebx
49*4882a593Smuzhiyun	subl	$ap_start16, %ebx
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun	data32 lgdt (%ebx)
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun	movl	%cr0, %eax
54*4882a593Smuzhiyun	andl	$(~(X86_CR0_PG | X86_CR0_AM | X86_CR0_WP | X86_CR0_NE | \
55*4882a593Smuzhiyun		    X86_CR0_TS | X86_CR0_EM | X86_CR0_MP)), %eax
56*4882a593Smuzhiyun	orl	$(X86_CR0_NW | X86_CR0_CD | X86_CR0_PE), %eax
57*4882a593Smuzhiyun	movl	%eax, %cr0
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun	movl	$ap_start_jmp, %eax
60*4882a593Smuzhiyun	subl	$ap_start16, %eax
61*4882a593Smuzhiyun	movw	%ax, %bp
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun	/* Jump to ap_start within U-Boot */
64*4882a593Smuzhiyundata32 cs	ljmp	*(%bp)
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun	.align	4
67*4882a593Smuzhiyun.globl sipi_params_16bit
68*4882a593Smuzhiyunsipi_params_16bit:
69*4882a593Smuzhiyun	/* 48-bit far pointer */
70*4882a593Smuzhiyunap_start_jmp:
71*4882a593Smuzhiyun	.long	0		/* offset set to ap_start by U-Boot */
72*4882a593Smuzhiyun	.word	CODE_SEG	/* segment */
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun	.word	0		/* padding */
75*4882a593Smuzhiyungdtaddr:
76*4882a593Smuzhiyun	.word	0 /* limit */
77*4882a593Smuzhiyun	.long	0 /* table */
78*4882a593Smuzhiyun	.word	0 /* unused */
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun.globl ap_start16_code_end
81*4882a593Smuzhiyunap_start16_code_end:
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun/*
84*4882a593Smuzhiyun * Set up the special 'fs' segment for global_data. Then jump to ap_continue
85*4882a593Smuzhiyun * to set up the AP.
86*4882a593Smuzhiyun */
87*4882a593Smuzhiyun.globl ap_start
88*4882a593Smuzhiyunap_start:
89*4882a593Smuzhiyun	.code32
90*4882a593Smuzhiyun	movw	$DATA_SEG, %ax
91*4882a593Smuzhiyun	movw	%ax, %ds
92*4882a593Smuzhiyun	movw	%ax, %es
93*4882a593Smuzhiyun	movw	%ax, %ss
94*4882a593Smuzhiyun	movw	%ax, %gs
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun	movw	$(X86_GDT_ENTRY_32BIT_FS * X86_GDT_ENTRY_SIZE), %ax
97*4882a593Smuzhiyun	movw	%ax, %fs
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun	/* Load the Interrupt descriptor table */
100*4882a593Smuzhiyun	mov	idt_ptr, %ebx
101*4882a593Smuzhiyun	lidt	(%ebx)
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun	/* Obtain cpu number */
104*4882a593Smuzhiyun	movl	ap_count, %eax
105*4882a593Smuzhiyun1:
106*4882a593Smuzhiyun	movl	%eax, %ecx
107*4882a593Smuzhiyun	inc	%ecx
108*4882a593Smuzhiyun	lock cmpxchg %ecx, ap_count
109*4882a593Smuzhiyun	jnz	1b
110*4882a593Smuzhiyun
111*4882a593Smuzhiyun	/* Setup stacks for each CPU */
112*4882a593Smuzhiyun	movl	stack_size, %eax
113*4882a593Smuzhiyun	mul	%ecx
114*4882a593Smuzhiyun	movl	stack_top, %edx
115*4882a593Smuzhiyun	subl	%eax, %edx
116*4882a593Smuzhiyun	mov	%edx, %esp
117*4882a593Smuzhiyun	/* Save cpu number */
118*4882a593Smuzhiyun	mov	%ecx, %esi
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun	/* Determine if one should check microcode versions */
121*4882a593Smuzhiyun	mov	microcode_ptr, %edi
122*4882a593Smuzhiyun	test	%edi, %edi
123*4882a593Smuzhiyun	jz	microcode_done /* Bypass if no microde exists */
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun	/* Get the Microcode version */
126*4882a593Smuzhiyun	mov	$1, %eax
127*4882a593Smuzhiyun	cpuid
128*4882a593Smuzhiyun	mov	$MSR_IA32_UCODE_REV, %ecx
129*4882a593Smuzhiyun	rdmsr
130*4882a593Smuzhiyun	/* If something already loaded skip loading again */
131*4882a593Smuzhiyun	test	%edx, %edx
132*4882a593Smuzhiyun	jnz	microcode_done
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun	/* Determine if parallel microcode loading is allowed */
135*4882a593Smuzhiyun	cmp	$0xffffffff, microcode_lock
136*4882a593Smuzhiyun	je	load_microcode
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun	/* Protect microcode loading */
139*4882a593Smuzhiyunlock_microcode:
140*4882a593Smuzhiyun	lock bts $0, microcode_lock
141*4882a593Smuzhiyun	jc	lock_microcode
142*4882a593Smuzhiyun
143*4882a593Smuzhiyunload_microcode:
144*4882a593Smuzhiyun	/* Load new microcode */
145*4882a593Smuzhiyun	mov	$MSR_IA32_UCODE_WRITE, %ecx
146*4882a593Smuzhiyun	xor	%edx, %edx
147*4882a593Smuzhiyun	mov	%edi, %eax
148*4882a593Smuzhiyun	/*
149*4882a593Smuzhiyun	 * The microcode pointer is passed in pointing to the header. Adjust
150*4882a593Smuzhiyun	 * pointer to reflect the payload (header size is 48 bytes)
151*4882a593Smuzhiyun	 */
152*4882a593Smuzhiyun	add	$UCODE_HEADER_LEN, %eax
153*4882a593Smuzhiyun	pusha
154*4882a593Smuzhiyun	wrmsr
155*4882a593Smuzhiyun	popa
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun	/* Unconditionally unlock microcode loading */
158*4882a593Smuzhiyun	cmp	$0xffffffff, microcode_lock
159*4882a593Smuzhiyun	je	microcode_done
160*4882a593Smuzhiyun
161*4882a593Smuzhiyun	xor	%eax, %eax
162*4882a593Smuzhiyun	mov	%eax, microcode_lock
163*4882a593Smuzhiyun
164*4882a593Smuzhiyunmicrocode_done:
165*4882a593Smuzhiyun	/*
166*4882a593Smuzhiyun	 * Load MSRs. Each entry in the table consists of:
167*4882a593Smuzhiyun	 * 0: index,
168*4882a593Smuzhiyun	 * 4: value[31:0]
169*4882a593Smuzhiyun	 * 8: value[63:32]
170*4882a593Smuzhiyun	 * See struct saved_msr in mp_init.c.
171*4882a593Smuzhiyun	 */
172*4882a593Smuzhiyun	mov	msr_table_ptr, %edi
173*4882a593Smuzhiyun	mov	msr_count, %ebx
174*4882a593Smuzhiyun	test	%ebx, %ebx
175*4882a593Smuzhiyun	jz	1f
176*4882a593Smuzhiyunload_msr:
177*4882a593Smuzhiyun	mov	(%edi), %ecx
178*4882a593Smuzhiyun	mov	4(%edi), %eax
179*4882a593Smuzhiyun	mov	8(%edi), %edx
180*4882a593Smuzhiyun	wrmsr
181*4882a593Smuzhiyun	add	$12, %edi
182*4882a593Smuzhiyun	dec	%ebx
183*4882a593Smuzhiyun	jnz	load_msr
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun1:
186*4882a593Smuzhiyun	/* Enable caching */
187*4882a593Smuzhiyun	mov	%cr0, %eax
188*4882a593Smuzhiyun	andl	$(~(X86_CR0_CD | X86_CR0_NW)), %eax
189*4882a593Smuzhiyun	mov	%eax, %cr0
190*4882a593Smuzhiyun
191*4882a593Smuzhiyun	/* c_handler(cpu_num) */
192*4882a593Smuzhiyun	movl	%esi, %eax	/* cpu_num */
193*4882a593Smuzhiyun	mov	c_handler, %esi
194*4882a593Smuzhiyun	call	*%esi
195*4882a593Smuzhiyun
196*4882a593Smuzhiyun	/* This matches struct sipi_param */
197*4882a593Smuzhiyun	.align	4
198*4882a593Smuzhiyun.globl	sipi_params
199*4882a593Smuzhiyunsipi_params:
200*4882a593Smuzhiyunidt_ptr:
201*4882a593Smuzhiyun	.long 0
202*4882a593Smuzhiyunstack_top:
203*4882a593Smuzhiyun	.long 0
204*4882a593Smuzhiyunstack_size:
205*4882a593Smuzhiyun	.long 0
206*4882a593Smuzhiyunmicrocode_lock:
207*4882a593Smuzhiyun	.long 0
208*4882a593Smuzhiyunmicrocode_ptr:
209*4882a593Smuzhiyun	.long 0
210*4882a593Smuzhiyunmsr_table_ptr:
211*4882a593Smuzhiyun	.long 0
212*4882a593Smuzhiyunmsr_count:
213*4882a593Smuzhiyun	.long 0
214*4882a593Smuzhiyunc_handler:
215*4882a593Smuzhiyun	.long 0
216*4882a593Smuzhiyunap_count:
217*4882a593Smuzhiyun	.long 0
218