1*4882a593Smuzhiyun/* 2*4882a593Smuzhiyun * Copyright (c) 2015 Google, Inc 3*4882a593Smuzhiyun * 4*4882a593Smuzhiyun * SPDX-License-Identifier: GPL-2.0 5*4882a593Smuzhiyun * 6*4882a593Smuzhiyun * Taken from coreboot file of the same name 7*4882a593Smuzhiyun */ 8*4882a593Smuzhiyun 9*4882a593Smuzhiyun/* 10*4882a593Smuzhiyun * The SIPI vector is responsible for initializing the APs in the sytem. It 11*4882a593Smuzhiyun * loads microcode, sets up MSRs, and enables caching before calling into 12*4882a593Smuzhiyun * C code 13*4882a593Smuzhiyun */ 14*4882a593Smuzhiyun 15*4882a593Smuzhiyun#include <asm/global_data.h> 16*4882a593Smuzhiyun#include <asm/msr-index.h> 17*4882a593Smuzhiyun#include <asm/processor.h> 18*4882a593Smuzhiyun#include <asm/processor-flags.h> 19*4882a593Smuzhiyun#include <asm/sipi.h> 20*4882a593Smuzhiyun 21*4882a593Smuzhiyun#define CODE_SEG (X86_GDT_ENTRY_32BIT_CS * X86_GDT_ENTRY_SIZE) 22*4882a593Smuzhiyun#define DATA_SEG (X86_GDT_ENTRY_32BIT_DS * X86_GDT_ENTRY_SIZE) 23*4882a593Smuzhiyun 24*4882a593Smuzhiyun/* 25*4882a593Smuzhiyun * First we have the 16-bit section. Every AP process starts here. 26*4882a593Smuzhiyun * The simple task is to load U-Boot's Global Descriptor Table (GDT) to allow 27*4882a593Smuzhiyun * U-Boot's 32-bit code to become visible, then jump to ap_start. 28*4882a593Smuzhiyun * 29*4882a593Smuzhiyun * Note that this code is copied to RAM below 1MB in mp_init.c, and runs from 30*4882a593Smuzhiyun * there, but the 32-bit code (ap_start and onwards) is part of U-Boot and 31*4882a593Smuzhiyun * is therefore relocated to the top of RAM with other U-Boot code. This 32*4882a593Smuzhiyun * means that for the 16-bit code we must write relocatable code, but for the 33*4882a593Smuzhiyun * rest, we can do what we like. 34*4882a593Smuzhiyun */ 35*4882a593Smuzhiyun.text 36*4882a593Smuzhiyun.code16 37*4882a593Smuzhiyun.globl ap_start16 38*4882a593Smuzhiyunap_start16: 39*4882a593Smuzhiyun cli 40*4882a593Smuzhiyun xorl %eax, %eax 41*4882a593Smuzhiyun movl %eax, %cr3 /* Invalidate TLB */ 42*4882a593Smuzhiyun 43*4882a593Smuzhiyun /* setup the data segment */ 44*4882a593Smuzhiyun movw %cs, %ax 45*4882a593Smuzhiyun movw %ax, %ds 46*4882a593Smuzhiyun 47*4882a593Smuzhiyun /* Use an address relative to the data segment for the GDT */ 48*4882a593Smuzhiyun movl $gdtaddr, %ebx 49*4882a593Smuzhiyun subl $ap_start16, %ebx 50*4882a593Smuzhiyun 51*4882a593Smuzhiyun data32 lgdt (%ebx) 52*4882a593Smuzhiyun 53*4882a593Smuzhiyun movl %cr0, %eax 54*4882a593Smuzhiyun andl $(~(X86_CR0_PG | X86_CR0_AM | X86_CR0_WP | X86_CR0_NE | \ 55*4882a593Smuzhiyun X86_CR0_TS | X86_CR0_EM | X86_CR0_MP)), %eax 56*4882a593Smuzhiyun orl $(X86_CR0_NW | X86_CR0_CD | X86_CR0_PE), %eax 57*4882a593Smuzhiyun movl %eax, %cr0 58*4882a593Smuzhiyun 59*4882a593Smuzhiyun movl $ap_start_jmp, %eax 60*4882a593Smuzhiyun subl $ap_start16, %eax 61*4882a593Smuzhiyun movw %ax, %bp 62*4882a593Smuzhiyun 63*4882a593Smuzhiyun /* Jump to ap_start within U-Boot */ 64*4882a593Smuzhiyundata32 cs ljmp *(%bp) 65*4882a593Smuzhiyun 66*4882a593Smuzhiyun .align 4 67*4882a593Smuzhiyun.globl sipi_params_16bit 68*4882a593Smuzhiyunsipi_params_16bit: 69*4882a593Smuzhiyun /* 48-bit far pointer */ 70*4882a593Smuzhiyunap_start_jmp: 71*4882a593Smuzhiyun .long 0 /* offset set to ap_start by U-Boot */ 72*4882a593Smuzhiyun .word CODE_SEG /* segment */ 73*4882a593Smuzhiyun 74*4882a593Smuzhiyun .word 0 /* padding */ 75*4882a593Smuzhiyungdtaddr: 76*4882a593Smuzhiyun .word 0 /* limit */ 77*4882a593Smuzhiyun .long 0 /* table */ 78*4882a593Smuzhiyun .word 0 /* unused */ 79*4882a593Smuzhiyun 80*4882a593Smuzhiyun.globl ap_start16_code_end 81*4882a593Smuzhiyunap_start16_code_end: 82*4882a593Smuzhiyun 83*4882a593Smuzhiyun/* 84*4882a593Smuzhiyun * Set up the special 'fs' segment for global_data. Then jump to ap_continue 85*4882a593Smuzhiyun * to set up the AP. 86*4882a593Smuzhiyun */ 87*4882a593Smuzhiyun.globl ap_start 88*4882a593Smuzhiyunap_start: 89*4882a593Smuzhiyun .code32 90*4882a593Smuzhiyun movw $DATA_SEG, %ax 91*4882a593Smuzhiyun movw %ax, %ds 92*4882a593Smuzhiyun movw %ax, %es 93*4882a593Smuzhiyun movw %ax, %ss 94*4882a593Smuzhiyun movw %ax, %gs 95*4882a593Smuzhiyun 96*4882a593Smuzhiyun movw $(X86_GDT_ENTRY_32BIT_FS * X86_GDT_ENTRY_SIZE), %ax 97*4882a593Smuzhiyun movw %ax, %fs 98*4882a593Smuzhiyun 99*4882a593Smuzhiyun /* Load the Interrupt descriptor table */ 100*4882a593Smuzhiyun mov idt_ptr, %ebx 101*4882a593Smuzhiyun lidt (%ebx) 102*4882a593Smuzhiyun 103*4882a593Smuzhiyun /* Obtain cpu number */ 104*4882a593Smuzhiyun movl ap_count, %eax 105*4882a593Smuzhiyun1: 106*4882a593Smuzhiyun movl %eax, %ecx 107*4882a593Smuzhiyun inc %ecx 108*4882a593Smuzhiyun lock cmpxchg %ecx, ap_count 109*4882a593Smuzhiyun jnz 1b 110*4882a593Smuzhiyun 111*4882a593Smuzhiyun /* Setup stacks for each CPU */ 112*4882a593Smuzhiyun movl stack_size, %eax 113*4882a593Smuzhiyun mul %ecx 114*4882a593Smuzhiyun movl stack_top, %edx 115*4882a593Smuzhiyun subl %eax, %edx 116*4882a593Smuzhiyun mov %edx, %esp 117*4882a593Smuzhiyun /* Save cpu number */ 118*4882a593Smuzhiyun mov %ecx, %esi 119*4882a593Smuzhiyun 120*4882a593Smuzhiyun /* Determine if one should check microcode versions */ 121*4882a593Smuzhiyun mov microcode_ptr, %edi 122*4882a593Smuzhiyun test %edi, %edi 123*4882a593Smuzhiyun jz microcode_done /* Bypass if no microde exists */ 124*4882a593Smuzhiyun 125*4882a593Smuzhiyun /* Get the Microcode version */ 126*4882a593Smuzhiyun mov $1, %eax 127*4882a593Smuzhiyun cpuid 128*4882a593Smuzhiyun mov $MSR_IA32_UCODE_REV, %ecx 129*4882a593Smuzhiyun rdmsr 130*4882a593Smuzhiyun /* If something already loaded skip loading again */ 131*4882a593Smuzhiyun test %edx, %edx 132*4882a593Smuzhiyun jnz microcode_done 133*4882a593Smuzhiyun 134*4882a593Smuzhiyun /* Determine if parallel microcode loading is allowed */ 135*4882a593Smuzhiyun cmp $0xffffffff, microcode_lock 136*4882a593Smuzhiyun je load_microcode 137*4882a593Smuzhiyun 138*4882a593Smuzhiyun /* Protect microcode loading */ 139*4882a593Smuzhiyunlock_microcode: 140*4882a593Smuzhiyun lock bts $0, microcode_lock 141*4882a593Smuzhiyun jc lock_microcode 142*4882a593Smuzhiyun 143*4882a593Smuzhiyunload_microcode: 144*4882a593Smuzhiyun /* Load new microcode */ 145*4882a593Smuzhiyun mov $MSR_IA32_UCODE_WRITE, %ecx 146*4882a593Smuzhiyun xor %edx, %edx 147*4882a593Smuzhiyun mov %edi, %eax 148*4882a593Smuzhiyun /* 149*4882a593Smuzhiyun * The microcode pointer is passed in pointing to the header. Adjust 150*4882a593Smuzhiyun * pointer to reflect the payload (header size is 48 bytes) 151*4882a593Smuzhiyun */ 152*4882a593Smuzhiyun add $UCODE_HEADER_LEN, %eax 153*4882a593Smuzhiyun pusha 154*4882a593Smuzhiyun wrmsr 155*4882a593Smuzhiyun popa 156*4882a593Smuzhiyun 157*4882a593Smuzhiyun /* Unconditionally unlock microcode loading */ 158*4882a593Smuzhiyun cmp $0xffffffff, microcode_lock 159*4882a593Smuzhiyun je microcode_done 160*4882a593Smuzhiyun 161*4882a593Smuzhiyun xor %eax, %eax 162*4882a593Smuzhiyun mov %eax, microcode_lock 163*4882a593Smuzhiyun 164*4882a593Smuzhiyunmicrocode_done: 165*4882a593Smuzhiyun /* 166*4882a593Smuzhiyun * Load MSRs. Each entry in the table consists of: 167*4882a593Smuzhiyun * 0: index, 168*4882a593Smuzhiyun * 4: value[31:0] 169*4882a593Smuzhiyun * 8: value[63:32] 170*4882a593Smuzhiyun * See struct saved_msr in mp_init.c. 171*4882a593Smuzhiyun */ 172*4882a593Smuzhiyun mov msr_table_ptr, %edi 173*4882a593Smuzhiyun mov msr_count, %ebx 174*4882a593Smuzhiyun test %ebx, %ebx 175*4882a593Smuzhiyun jz 1f 176*4882a593Smuzhiyunload_msr: 177*4882a593Smuzhiyun mov (%edi), %ecx 178*4882a593Smuzhiyun mov 4(%edi), %eax 179*4882a593Smuzhiyun mov 8(%edi), %edx 180*4882a593Smuzhiyun wrmsr 181*4882a593Smuzhiyun add $12, %edi 182*4882a593Smuzhiyun dec %ebx 183*4882a593Smuzhiyun jnz load_msr 184*4882a593Smuzhiyun 185*4882a593Smuzhiyun1: 186*4882a593Smuzhiyun /* Enable caching */ 187*4882a593Smuzhiyun mov %cr0, %eax 188*4882a593Smuzhiyun andl $(~(X86_CR0_CD | X86_CR0_NW)), %eax 189*4882a593Smuzhiyun mov %eax, %cr0 190*4882a593Smuzhiyun 191*4882a593Smuzhiyun /* c_handler(cpu_num) */ 192*4882a593Smuzhiyun movl %esi, %eax /* cpu_num */ 193*4882a593Smuzhiyun mov c_handler, %esi 194*4882a593Smuzhiyun call *%esi 195*4882a593Smuzhiyun 196*4882a593Smuzhiyun /* This matches struct sipi_param */ 197*4882a593Smuzhiyun .align 4 198*4882a593Smuzhiyun.globl sipi_params 199*4882a593Smuzhiyunsipi_params: 200*4882a593Smuzhiyunidt_ptr: 201*4882a593Smuzhiyun .long 0 202*4882a593Smuzhiyunstack_top: 203*4882a593Smuzhiyun .long 0 204*4882a593Smuzhiyunstack_size: 205*4882a593Smuzhiyun .long 0 206*4882a593Smuzhiyunmicrocode_lock: 207*4882a593Smuzhiyun .long 0 208*4882a593Smuzhiyunmicrocode_ptr: 209*4882a593Smuzhiyun .long 0 210*4882a593Smuzhiyunmsr_table_ptr: 211*4882a593Smuzhiyun .long 0 212*4882a593Smuzhiyunmsr_count: 213*4882a593Smuzhiyun .long 0 214*4882a593Smuzhiyunc_handler: 215*4882a593Smuzhiyun .long 0 216*4882a593Smuzhiyunap_count: 217*4882a593Smuzhiyun .long 0 218