[U-Boot] [PATCH v2 15/20] x86: Add multi-processor init

Simon Glass sjg at chromium.org
Wed Apr 29 04:25:17 CEST 2015


Most modern x86 CPUs include more than one CPU core. The OS normally requires
that these 'Application Processors' (APs) be brought up by the boot loader.
Add the required support to U-Boot to init additional APs.

Signed-off-by: Simon Glass <sjg at chromium.org>
---

Changes in v2: None

 arch/x86/Kconfig                     |  25 ++
 arch/x86/cpu/Makefile                |   2 +
 arch/x86/cpu/ivybridge/model_206ax.c |   4 +-
 arch/x86/cpu/mp_init.c               | 507 +++++++++++++++++++++++++++++++++++
 arch/x86/cpu/sipi.S                  | 215 +++++++++++++++
 arch/x86/include/asm/mp.h            |  94 +++++++
 arch/x86/include/asm/sipi.h          |  79 ++++++
 arch/x86/include/asm/smm.h           |  14 +
 8 files changed, 938 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/cpu/mp_init.c
 create mode 100644 arch/x86/cpu/sipi.S
 create mode 100644 arch/x86/include/asm/mp.h
 create mode 100644 arch/x86/include/asm/sipi.h
 create mode 100644 arch/x86/include/asm/smm.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f38e9ba..f89ee5c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -361,6 +361,31 @@ config FSP_TEMP_RAM_ADDR
 	  Stack top address which is used in FspInit after DRAM is ready and
 	  CAR is disabled.
 
+config MAX_CPUS
+        int "Maximum number of CPUs permitted"
+        default 4
+        help
+          When using multi-CPU chips it is possible for U-Boot to start up
+          more than one CPU. The stack memory used by all of these CPUs is
+          pre-allocated so at present U-Boot wants to know the maximum
+          number of CPUs that may be present. Set this to at least as high
+          as the number of CPUs in your system (it uses about 4KB of RAM for
+          each CPU).
+
+config SMP
+	bool "Enable Symmetric Multiprocessing"
+	default n
+	help
+	  Enable use of more than one CPU in U-Boot and the Operating System
+	  when loaded. Each CPU will be started up and information can be
+	  obtained using the 'cpu' command. If this option is disabled, then
+	  only one CPU will be enabled regardless of the number of CPUs
+	  available.
+
+config STACK_SIZE
+	hex
+	default 0x1000
+
 config TSC_CALIBRATION_BYPASS
 	bool "Bypass Time-Stamp Counter (TSC) calibration"
 	default n
diff --git a/arch/x86/cpu/Makefile b/arch/x86/cpu/Makefile
index 6ded0a7..9a08ab4 100644
--- a/arch/x86/cpu/Makefile
+++ b/arch/x86/cpu/Makefile
@@ -19,6 +19,8 @@ obj-$(CONFIG_NORTHBRIDGE_INTEL_IVYBRIDGE) += ivybridge/
 obj-$(CONFIG_INTEL_QUARK) += quark/
 obj-$(CONFIG_INTEL_QUEENSBAY) += queensbay/
 obj-y += lapic.o
+obj-$(CONFIG_SMP) += mp_init.o
 obj-y += mtrr.o
 obj-$(CONFIG_PCI) += pci.o
+obj-$(CONFIG_SMP) += sipi.o
 obj-y += turbo.o
diff --git a/arch/x86/cpu/ivybridge/model_206ax.c b/arch/x86/cpu/ivybridge/model_206ax.c
index 11dc625..8b08c40 100644
--- a/arch/x86/cpu/ivybridge/model_206ax.c
+++ b/arch/x86/cpu/ivybridge/model_206ax.c
@@ -435,8 +435,8 @@ static int intel_cores_init(struct x86_cpu_priv *cpu)
 
 		debug("CPU: %u has core %u\n", cpu->apic_id, new_cpu->apic_id);
 
-#if CONFIG_SMP && CONFIG_MAX_CPUS > 1
-		/* Start the new cpu */
+#if 0 && CONFIG_SMP && CONFIG_MAX_CPUS > 1
+		/* TODO(sjg at chromium.org): Start the new cpu */
 		if (!start_cpu(new_cpu)) {
 			/* Record the error in cpu? */
 			printk(BIOS_ERR, "CPU %u would not start!\n",
diff --git a/arch/x86/cpu/mp_init.c b/arch/x86/cpu/mp_init.c
new file mode 100644
index 0000000..c1fd4e0
--- /dev/null
+++ b/arch/x86/cpu/mp_init.c
@@ -0,0 +1,507 @@
+/*
+ * Copyright (C) 2015 Google, Inc
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ *
+ * Based on code from the coreboot file of the same name
+ */
+
+#include <common.h>
+#include <cpu.h>
+#include <dm.h>
+#include <errno.h>
+#include <malloc.h>
+#include <asm/atomic.h>
+#include <asm/cpu.h>
+#include <asm/interrupt.h>
+#include <asm/lapic.h>
+#include <asm/mp.h>
+#include <asm/mtrr.h>
+#include <asm/sipi.h>
+#include <asm/smm.h>
+#include <dm/device-internal.h>
+#include <dm/uclass-internal.h>
+#include <linux/linkage.h>
+
+/* This also needs to match the sipi.S assembly code for saved MSR encoding */
+struct saved_msr {
+	uint32_t index;
+	uint32_t lo;
+	uint32_t hi;
+} __packed;
+
+
+/*
+ * The SIPI vector is loaded at the SMM_DEFAULT_BASE. The reason is at the
+ * memory range is already reserved so the OS cannot use it. That region is
+ * free to use for AP bringup before SMM is initialised.
+ */
+static const uint32_t sipi_vector_location = SMM_DEFAULT_BASE;
+static const int sipi_vector_location_size = SMM_DEFAULT_SIZE;
+
+struct mp_flight_plan {
+	int num_records;
+	struct mp_flight_record *records;
+};
+
+static struct mp_flight_plan mp_info;
+
+struct cpu_map {
+	struct udevice *dev;
+	int apic_id;
+	int err_code;
+};
+
+static inline void barrier_wait(atomic_t *b)
+{
+	while (atomic_read(b) == 0)
+		asm("pause");
+	mfence();
+}
+
+static inline void release_barrier(atomic_t *b)
+{
+	mfence();
+	atomic_set(b, 1);
+}
+
+/* Returns 1 if timeout waiting for APs. 0 if target APs found */
+static int wait_for_aps(atomic_t *val, int target, int total_delay,
+			int delay_step)
+{
+	int timeout = 0;
+	int delayed = 0;
+
+	while (atomic_read(val) != target) {
+		udelay(delay_step);
+		delayed += delay_step;
+		if (delayed >= total_delay) {
+			timeout = 1;
+			break;
+		}
+	}
+
+	return timeout;
+}
+
+static void ap_do_flight_plan(struct udevice *cpu)
+{
+	int i;
+
+	for (i = 0; i < mp_info.num_records; i++) {
+		struct mp_flight_record *rec = &mp_info.records[i];
+
+		atomic_inc(&rec->cpus_entered);
+		barrier_wait(&rec->barrier);
+
+		if (rec->ap_call != NULL)
+			rec->ap_call(cpu, rec->ap_arg);
+	}
+}
+
+static int find_cpu_by_apid_id(int apic_id, struct udevice **devp)
+{
+	struct udevice *dev;
+
+	*devp = NULL;
+	for (uclass_find_first_device(UCLASS_CPU, &dev);
+	     dev;
+	     uclass_find_next_device(&dev)) {
+		struct cpu_platdata *plat = dev_get_parent_platdata(dev);
+
+		if (plat->cpu_id == apic_id) {
+			*devp = dev;
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
+/*
+ * By the time APs call ap_init() caching has been setup, and microcode has
+ * been loaded
+ */
+static void asmlinkage ap_init(unsigned int cpu_index)
+{
+	struct udevice *dev;
+	int apic_id;
+	int ret;
+
+	/* Ensure the local apic is enabled */
+	enable_lapic();
+
+	apic_id = lapicid();
+	ret = find_cpu_by_apid_id(apic_id, &dev);
+	if (ret) {
+		debug("Unknown CPU apic_id %x\n", apic_id);
+		goto done;
+	}
+
+	debug("AP: slot %d apic_id %x, dev %s\n", cpu_index, apic_id,
+	      dev ? dev->name : "(apic_id not found)");
+
+	/* Walk the flight plan */
+	ap_do_flight_plan(dev);
+
+	/* Park the AP */
+	debug("parking\n");
+done:
+	stop_this_cpu();
+}
+
+#define NUM_FIXED_MTRRS 11
+
+static const unsigned int fixed_mtrrs[NUM_FIXED_MTRRS] = {
+	MTRR_FIX_64K_00000_MSR, MTRR_FIX_16K_80000_MSR, MTRR_FIX_16K_A0000_MSR,
+	MTRR_FIX_4K_C0000_MSR, MTRR_FIX_4K_C8000_MSR, MTRR_FIX_4K_D0000_MSR,
+	MTRR_FIX_4K_D8000_MSR, MTRR_FIX_4K_E0000_MSR, MTRR_FIX_4K_E8000_MSR,
+	MTRR_FIX_4K_F0000_MSR, MTRR_FIX_4K_F8000_MSR,
+};
+
+static inline struct saved_msr *save_msr(int index, struct saved_msr *entry)
+{
+	msr_t msr;
+
+	msr = msr_read(index);
+	entry->index = index;
+	entry->lo = msr.lo;
+	entry->hi = msr.hi;
+
+	/* Return the next entry */
+	entry++;
+	return entry;
+}
+
+static int save_bsp_msrs(char *start, int size)
+{
+	int msr_count;
+	int num_var_mtrrs;
+	struct saved_msr *msr_entry;
+	int i;
+	msr_t msr;
+
+	/* Determine number of MTRRs need to be saved */
+	msr = msr_read(MTRR_CAP_MSR);
+	num_var_mtrrs = msr.lo & 0xff;
+
+	/* 2 * num_var_mtrrs for base and mask. +1 for IA32_MTRR_DEF_TYPE */
+	msr_count = 2 * num_var_mtrrs + NUM_FIXED_MTRRS + 1;
+
+	if ((msr_count * sizeof(struct saved_msr)) > size) {
+		printf("Cannot mirror all %d msrs.\n", msr_count);
+		return -ENOSPC;
+	}
+
+	msr_entry = (void *)start;
+	for (i = 0; i < NUM_FIXED_MTRRS; i++)
+		msr_entry = save_msr(fixed_mtrrs[i], msr_entry);
+
+	for (i = 0; i < num_var_mtrrs; i++) {
+		msr_entry = save_msr(MTRR_PHYS_BASE_MSR(i), msr_entry);
+		msr_entry = save_msr(MTRR_PHYS_MASK_MSR(i), msr_entry);
+	}
+
+	msr_entry = save_msr(MTRR_DEF_TYPE_MSR, msr_entry);
+
+	return msr_count;
+}
+
+static int load_sipi_vector(atomic_t **ap_countp)
+{
+	struct sipi_params_16bit *params16;
+	struct sipi_params *params;
+	static char msr_save[512];
+	char *stack;
+	ulong addr;
+	int code_len;
+	int size;
+	int ret;
+
+	/* Copy in the code */
+	code_len = ap_start16_code_end - ap_start16;
+	debug("Copying SIPI code to %x: %d bytes\n", SMM_DEFAULT_BASE,
+	      code_len);
+	memcpy((void *)SMM_DEFAULT_BASE, ap_start16, code_len);
+
+	addr = SMM_DEFAULT_BASE + (ulong)sipi_params_16bit - (ulong)ap_start16;
+	params16 = (struct sipi_params_16bit *)addr;
+	params16->ap_start = (uint32_t)ap_start;
+	params16->gdt = (uint32_t)gd->arch.gdt;
+	params16->gdt_limit = X86_GDT_SIZE - 1;
+	debug("gdt = %x, gdt_limit = %x\n", params16->gdt, params16->gdt_limit);
+
+	params = (struct sipi_params *)sipi_params;
+	debug("SIPI 32-bit params at %p\n", params);
+	params->idt_ptr = (uint32_t)x86_get_idt();
+
+	params->stack_size = 4096;
+	size = params->stack_size * CONFIG_MAX_CPUS;
+	stack = memalign(size, 4096);
+	if (!stack)
+		return -ENOMEM;
+	params->stack_top = (u32)(stack + size);
+
+	params->microcode_ptr = 0;
+	params->msr_table_ptr = (u32)msr_save;
+	ret = save_bsp_msrs(msr_save, sizeof(msr_save));
+	if (ret < 0)
+		return ret;
+	params->msr_count = ret;
+
+	params->c_handler = (uint32_t)&ap_init;
+
+	*ap_countp = &params->ap_count;
+	atomic_set(*ap_countp, 0);
+	debug("SIPI vector is ready\n");
+
+	return 0;
+}
+
+static int check_cpu_devices(int expected_cpus)
+{
+	int i;
+
+	for (i = 0; i < expected_cpus; i++) {
+		struct udevice *dev;
+		int ret;
+
+		ret = uclass_find_device(UCLASS_CPU, i, &dev);
+		if (ret) {
+			debug("Cannot find CPU %d in device tree\n", i);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/* Returns 1 for timeout. 0 on success */
+static int apic_wait_timeout(int total_delay, int delay_step)
+{
+	int total = 0;
+	int timeout = 0;
+
+	while (lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY) {
+		udelay(delay_step);
+		total += delay_step;
+		if (total >= total_delay) {
+			timeout = 1;
+			break;
+		}
+	}
+
+	return timeout;
+}
+
+static int start_aps(int ap_count, atomic_t *num_aps)
+{
+	int sipi_vector;
+	/* Max location is 4KiB below 1MiB */
+	const int max_vector_loc = ((1 << 20) - (1 << 12)) >> 12;
+
+	if (ap_count == 0)
+		return 0;
+
+	/* The vector is sent as a 4k aligned address in one byte */
+	sipi_vector = sipi_vector_location >> 12;
+
+	if (sipi_vector > max_vector_loc) {
+		printf("SIPI vector too large! 0x%08x\n",
+		       sipi_vector);
+		return -1;
+	}
+
+	debug("Attempting to start %d APs\n", ap_count);
+
+	if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
+		debug("Waiting for ICR not to be busy...");
+		if (apic_wait_timeout(1000 /* 1 ms */, 50)) {
+			debug("timed out. Aborting.\n");
+			return -1;
+		} else {
+			debug("done.\n");
+		}
+	}
+
+	/* Send INIT IPI to all but self */
+	lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0));
+	lapic_write_around(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT |
+			   LAPIC_DM_INIT);
+	debug("Waiting for 10ms after sending INIT.\n");
+	mdelay(10);
+
+	/* Send 1st SIPI */
+	if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
+		debug("Waiting for ICR not to be busy...");
+		if (apic_wait_timeout(1000 /* 1 ms */, 50)) {
+			debug("timed out. Aborting.\n");
+			return -1;
+		} else {
+			debug("done.\n");
+		}
+	}
+
+	lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0));
+	lapic_write_around(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT |
+			   LAPIC_DM_STARTUP | sipi_vector);
+	debug("Waiting for 1st SIPI to complete...");
+	if (apic_wait_timeout(10000 /* 10 ms */, 50 /* us */)) {
+		debug("timed out.\n");
+		return -1;
+	} else {
+		debug("done.\n");
+	}
+
+	/* Wait for CPUs to check in up to 200 us */
+	wait_for_aps(num_aps, ap_count, 200 /* us */, 15 /* us */);
+
+	/* Send 2nd SIPI */
+	if ((lapic_read(LAPIC_ICR) & LAPIC_ICR_BUSY)) {
+		debug("Waiting for ICR not to be busy...");
+		if (apic_wait_timeout(1000 /* 1 ms */, 50)) {
+			debug("timed out. Aborting.\n");
+			return -1;
+		} else {
+			debug("done.\n");
+		}
+	}
+
+	lapic_write_around(LAPIC_ICR2, SET_LAPIC_DEST_FIELD(0));
+	lapic_write_around(LAPIC_ICR, LAPIC_DEST_ALLBUT | LAPIC_INT_ASSERT |
+			   LAPIC_DM_STARTUP | sipi_vector);
+	debug("Waiting for 2nd SIPI to complete...");
+	if (apic_wait_timeout(10000 /* 10 ms */, 50 /* us */)) {
+		debug("timed out.\n");
+		return -1;
+	} else {
+		debug("done.\n");
+	}
+
+	/* Wait for CPUs to check in */
+	if (wait_for_aps(num_aps, ap_count, 10000 /* 10 ms */, 50 /* us */)) {
+		debug("Not all APs checked in: %d/%d.\n",
+		      atomic_read(num_aps), ap_count);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int bsp_do_flight_plan(struct udevice *cpu, struct mp_params *mp_params)
+{
+	int i;
+	int ret = 0;
+	const int timeout_us = 100000;
+	const int step_us = 100;
+	int num_aps = mp_params->num_cpus - 1;
+
+	for (i = 0; i < mp_params->num_records; i++) {
+		struct mp_flight_record *rec = &mp_params->flight_plan[i];
+
+		/* Wait for APs if the record is not released */
+		if (atomic_read(&rec->barrier) == 0) {
+			/* Wait for the APs to check in */
+			if (wait_for_aps(&rec->cpus_entered, num_aps,
+					 timeout_us, step_us)) {
+				debug("MP record %d timeout.\n", i);
+				ret = -1;
+			}
+		}
+
+		if (rec->bsp_call != NULL)
+			rec->bsp_call(cpu, rec->bsp_arg);
+
+		release_barrier(&rec->barrier);
+	}
+	return ret;
+}
+
+static int init_bsp(struct udevice **devp)
+{
+	char processor_name[CPU_MAX_NAME_LEN];
+	int apic_id;
+	int ret;
+
+	cpu_get_name(processor_name);
+	debug("CPU: %s.\n", processor_name);
+
+	enable_lapic();
+
+	apic_id = lapicid();
+	ret = find_cpu_by_apid_id(apic_id, devp);
+	if (ret) {
+		printf("Cannot find boot CPU, APIC ID %d\n", apic_id);
+		return ret;
+	}
+
+	return 0;
+}
+
+int mp_init(struct mp_params *p)
+{
+	int num_aps;
+	atomic_t *ap_count;
+	struct udevice *cpu;
+	int ret;
+
+	/* This will cause the CPUs devices to be bound */
+	struct uclass *uc;
+	ret = uclass_get(UCLASS_CPU, &uc);
+	if (ret)
+		return ret;
+
+	ret = init_bsp(&cpu);
+	if (ret) {
+		debug("Cannot init boot CPU: err=%d\n", ret);
+		return ret;
+	}
+
+	if (p == NULL || p->flight_plan == NULL || p->num_records < 1) {
+		printf("Invalid MP parameters\n");
+		return -1;
+	}
+
+	ret = check_cpu_devices(p->num_cpus);
+	if (ret)
+		debug("Warning: Device tree does not describe all CPUs. Extra ones will not be started correctly\n");
+
+	/* Copy needed parameters so that APs have a reference to the plan */
+	mp_info.num_records = p->num_records;
+	mp_info.records = p->flight_plan;
+
+	/* Load the SIPI vector */
+	ret = load_sipi_vector(&ap_count);
+	if (ap_count == NULL)
+		return -1;
+
+	/*
+	 * Make sure SIPI data hits RAM so the APs that come up will see
+	 * the startup code even if the caches are disabled
+	 */
+	wbinvd();
+
+	/* Start the APs providing number of APs and the cpus_entered field */
+	num_aps = p->num_cpus - 1;
+	ret = start_aps(num_aps, ap_count);
+	if (ret) {
+		mdelay(1000);
+		debug("%d/%d eventually checked in?\n", atomic_read(ap_count),
+		      num_aps);
+		return ret;
+	}
+
+	/* Walk the flight plan for the BSP */
+	ret = bsp_do_flight_plan(cpu, p);
+	if (ret) {
+		debug("CPU init failed: err=%d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+int mp_init_cpu(struct udevice *cpu, void *unused)
+{
+	return device_probe(cpu);
+}
diff --git a/arch/x86/cpu/sipi.S b/arch/x86/cpu/sipi.S
new file mode 100644
index 0000000..09fde21
--- /dev/null
+++ b/arch/x86/cpu/sipi.S
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 2015 Google, Inc
+ *
+ * SPDX-License-Identifier:	GPL-2.0
+ *
+ * Taken from coreboot file of the same name
+ */
+
+/*
+ * The SIPI vector is responsible for initializing the APs in the sytem. It
+ * loads microcode, sets up MSRs, and enables caching before calling into
+ * C code
+ */
+
+#include <asm/global_data.h>
+#include <asm/msr-index.h>
+#include <asm/processor.h>
+#include <asm/processor-flags.h>
+#include <asm/smm.h>
+
+#define CODE_SEG	(X86_GDT_ENTRY_32BIT_CS * X86_GDT_ENTRY_SIZE)
+#define DATA_SEG	(X86_GDT_ENTRY_32BIT_DS * X86_GDT_ENTRY_SIZE)
+
+#define o32		.byte 0x66;
+
+/*
+ * First we have the 16-bit section. Every AP process starts here.
+ * The simple task is to load U-Boot's Global Descriptor Table (GDT) to allow
+ * U-Boot's 32-bit code to become visible, then jump to ap_start32.
+ *
+ * Note that this code is copied to RAM below 1MB in mp_init.c, and runs from
+ * there, but the 32-bit code (ap_start32 and onwards) is part of U-Boot and
+ * is therefore relocated to the top of RAM with other U-Boot code. This
+ * means that for the 16-bit code we must write relocatable code, but for the
+ * rest, we can do what we like.
+ */
+.text
+.code16
+.globl ap_start16
+ap_start16:
+	cli
+	xorl	%eax, %eax
+	movl	%eax, %cr3		/* Invalidate TLB */
+
+	/* setup the data segment */
+	movw	%cs, %ax
+	movw	%ax, %ds
+
+	/* Use an address relative to the data segment for the GDT */
+	movl	$gdtaddr, %ebx
+	subl	$ap_start16, %ebx
+
+	data32 lgdt (%ebx)
+
+	movl	%cr0, %eax
+	andl	$0x7ffaffd1, %eax	/* PG, AM, WP, NE, TS, EM, MP = 0 */
+	orl	$0x60000001, %eax	/* CD, NW, PE = 1 */
+	movl	%eax, %cr0
+
+	movl	$ap_start_jmp, %eax
+	subl	$ap_start16, %eax
+	movw	%ax, %bp
+
+	/* Jump to ap_start32 within U-Boot */
+o32 cs	ljmp	*(%bp)
+
+	.align	4
+.globl sipi_params_16bit
+sipi_params_16bit:
+	/* 48-bit far pointer */
+ap_start_jmp:
+	.long	0		/* offset set to ap_start by U-Boot */
+	.word	CODE_SEG	/* segment */
+
+	.word	0		/* padding */
+gdtaddr:
+	.word	0 /* limit */
+	.long	0 /* table */
+	.word	0 /* unused */
+
+.globl ap_start16_code_end
+ap_start16_code_end:
+
+/*
+ * Set up the special 'fs' segment for global_data. Then jump to ap_continue
+ * to set up the AP.
+ */
+.globl ap_start
+ap_start:
+	.code32
+	movw	$DATA_SEG, %ax
+	movw	%ax, %ds
+	movw	%ax, %es
+	movw	%ax, %ss
+	movw	%ax, %gs
+
+	movw	$(X86_GDT_ENTRY_32BIT_FS * X86_GDT_ENTRY_SIZE), %ax
+	movw	%ax, %fs
+
+	/* Load the Interrupt descriptor table */
+	mov	idt_ptr, %ebx
+	lidt	(%ebx)
+
+	/* Obtain cpu number */
+	movl	ap_count, %eax
+1:
+	movl	%eax, %ecx
+	inc	%ecx
+	lock cmpxchg %ecx, ap_count
+	jnz	1b
+
+	/* Setup stacks for each CPU */
+	movl	stack_size, %eax
+	mul	%ecx
+	movl	stack_top, %edx
+	subl	%eax, %edx
+	mov	%edx, %esp
+	/* Save cpu number */
+	mov	%ecx, %esi
+
+	/* Determine if one should check microcode versions */
+	mov	microcode_ptr, %edi
+	test	%edi, %edi
+	jz	microcode_done /* Bypass if no microde exists */
+
+	/* Get the Microcode version */
+	mov	$1, %eax
+	cpuid
+	mov	$MSR_IA32_UCODE_REV, %ecx
+	rdmsr
+	/* If something already loaded skip loading again */
+	test	%edx, %edx
+	jnz	microcode_done
+
+	/* Determine if parallel microcode loading is allowed */
+	cmp	$0xffffffff, microcode_lock
+	je	load_microcode
+
+	/* Protect microcode loading */
+lock_microcode:
+	lock bts $0, microcode_lock
+	jc	lock_microcode
+
+load_microcode:
+	/* Load new microcode */
+	mov	$MSR_IA32_UCODE_WRITE, %ecx
+	xor	%edx, %edx
+	mov	%edi, %eax
+	/* The microcode pointer is passed in pointing to the header. Adjust
+	 * pointer to reflect the payload (header size is 48 bytes) */
+	add	$48, %eax
+	pusha
+	wrmsr
+	popa
+
+	/* Unconditionally unlock microcode loading */
+	cmp	$0xffffffff, microcode_lock
+	je	microcode_done
+
+	xor	%eax, %eax
+	mov	%eax, microcode_lock
+
+microcode_done:
+	/*
+	 * Load MSRs. Each entry in the table consists of:
+	 * 0: index,
+	 * 4: value[31:0]
+	 * 8: value[63:32]
+	 * See struct saved_msr in mp_init.c.
+	 */
+	mov	msr_table_ptr, %edi
+	mov	msr_count, %ebx
+	test	%ebx, %ebx
+	jz	1f
+load_msr:
+	mov	(%edi), %ecx
+	mov	4(%edi), %eax
+	mov	8(%edi), %edx
+	wrmsr
+	add	$12, %edi
+	dec	%ebx
+	jnz	load_msr
+
+1:
+	/* Enable caching */
+	mov	%cr0, %eax
+	and	$0x9fffffff, %eax /* CD, NW = 0 */
+	mov	%eax, %cr0
+
+	/* c_handler(cpu_num) */
+	push	%esi	/* cpu_num */
+	mov	c_handler, %eax
+	call	*%eax
+
+	.align	4
+.globl	sipi_params
+sipi_params:
+idt_ptr:
+	.long 0
+stack_top:
+	.long 0
+stack_size:
+	.long 0
+microcode_lock:
+	.long 0
+microcode_ptr:
+	.long 0
+msr_table_ptr:
+	.long 0
+msr_count:
+	.long 0
+c_handler:
+	.long 0
+ap_count:
+	.long 0
diff --git a/arch/x86/include/asm/mp.h b/arch/x86/include/asm/mp.h
new file mode 100644
index 0000000..5dc0e33
--- /dev/null
+++ b/arch/x86/include/asm/mp.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2015 Google, Inc
+ *
+ * SPDX-License-Identifier:	GPL-2.0
+ *
+ * Taken from coreboot file of the same name
+ */
+
+#ifndef _X86_MP_H_
+#define _X86_MP_H_
+
+#include <asm/atomic.h>
+
+typedef int (*mp_callback_t)(struct udevice *cpu, void *arg);
+
+/*
+ * A mp_flight_record details a sequence of calls for the APs to perform
+ * along with the BSP to coordinate sequencing. Each flight record either
+ * provides a barrier for each AP before calling the callback or the APs
+ * are allowed to perform the callback without waiting. Regardless, each
+ * record has the cpus_entered field incremented for each record. When
+ * the BSP observes that the cpus_entered matches the number of APs
+ * the bsp_call is called with bsp_arg and upon returning releases the
+ * barrier allowing the APs to make further progress.
+ *
+ * Note that ap_call() and bsp_call() can be NULL. In the NULL case the
+ * callback will just not be called.
+ */
+struct mp_flight_record {
+	atomic_t barrier;
+	atomic_t cpus_entered;
+	mp_callback_t ap_call;
+	void *ap_arg;
+	mp_callback_t bsp_call;
+	void *bsp_arg;
+} __attribute__((aligned(ARCH_DMA_MINALIGN)));
+
+#define _MP_FLIGHT_RECORD(barrier_, ap_func_, ap_arg_, bsp_func_, bsp_arg_) \
+	{							\
+		.barrier = ATOMIC_INIT(barrier_),		\
+		.cpus_entered = ATOMIC_INIT(0),			\
+		.ap_call = ap_func_,				\
+		.ap_arg = ap_arg_,				\
+		.bsp_call = bsp_func_,				\
+		.bsp_arg = bsp_arg_,				\
+	}
+
+#define MP_FR_BLOCK_APS(ap_func_, ap_arg_, bsp_func_, bsp_arg_) \
+	_MP_FLIGHT_RECORD(0, ap_func_, ap_arg_, bsp_func_, bsp_arg_)
+
+#define MP_FR_NOBLOCK_APS(ap_func_, ap_arg_, bsp_func_, bsp_arg_) \
+	_MP_FLIGHT_RECORD(1, ap_func_, ap_arg_, bsp_func_, bsp_arg_)
+
+/*
+ * The mp_params structure provides the arguments to the mp subsystem
+ * for bringing up APs.
+ *
+ * At present this is overkill for U-Boot, but it may make it easier to add
+ * SMM support.
+ */
+struct mp_params {
+	int num_cpus; /* Total cpus include BSP */
+	int parallel_microcode_load;
+	const void *microcode_pointer;
+	/* Flight plan  for APs and BSP */
+	struct mp_flight_record *flight_plan;
+	int num_records;
+};
+
+/*
+ * mp_init() will set up the SIPI vector and bring up the APs according to
+ * mp_params. Each flight record will be executed according to the plan. Note
+ * that the MP infrastructure uses SMM default area without saving it. It's
+ * up to the chipset or mainboard to either e820 reserve this area or save this
+ * region prior to calling mp_init() and restoring it after mp_init returns.
+ *
+ * At the time mp_init() is called the MTRR MSRs are mirrored into APs then
+ * caching is enabled before running the flight plan.
+ *
+ * The MP init has the following properties:
+ * 1. APs are brought up in parallel.
+ * 2. The ordering of coreboot cpu number and APIC ids is not deterministic.
+ *    Therefore, one cannot rely on this property or the order of devices in
+ *    the device tree unless the chipset or mainboard know the APIC ids
+ *    a priori.
+ *
+ * mp_init() returns < 0 on error, 0 on success.
+ */
+int mp_init(struct mp_params *params);
+
+/* Probes the CPU device */
+int mp_init_cpu(struct udevice *cpu, void *unused);
+
+#endif /* _X86_MP_H_ */
diff --git a/arch/x86/include/asm/sipi.h b/arch/x86/include/asm/sipi.h
new file mode 100644
index 0000000..bb2f0de
--- /dev/null
+++ b/arch/x86/include/asm/sipi.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2015 Gooogle, Inc
+ * Written by Simon Glass <sjg at chromium.org>
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#ifndef _ASM_SIPI_H
+#define _ASM_SIPI_H
+
+/**
+ * struct sipi_params_16bit - 16-bit SIPI entry-point parameters
+ *
+ * These are set up in the same space as the SIPI 16-bit code so that each AP
+ * can access the parameters when it boots.
+ *
+ * Each of these must be set up for the AP to boot, except @segment which is
+ * set in the assembly code.
+ *
+ * @ap_start:		32-bit SIPI entry point for U-Boot
+ * @segment:		Code segment for U-Boot
+ * @pad:		Padding (not used)
+ * @gdt_limit:		U-Boot GDT limit (X86_GDT_SIZE - 1)
+ * @gdt:		U-Boot GDT (gd->arch.gdt)
+ * @unused:		Not used
+ */
+struct __packed sipi_params_16bit {
+	u32 ap_start;
+	u16 segment;
+	u16 pad;
+
+	u16 gdt_limit;
+	u32 gdt;
+	u16 unused;
+};
+
+/**
+ * struct sipi_params - 32-bit SIP entry-point parameters
+ *
+ * These are used by the AP init code and must be set up before the APs start.
+ *
+ * The stack area extends down from @stack_top, with @stack_size allocated
+ * for each AP.
+ *
+ * @idt_ptr:		Interrupt descriptor table pointer
+ * @stack_top:		Top of the AP stack area
+ * @stack_size:		Size of each AP's stack
+ * @microcode_lock:	Used to ensure only one AP loads microcode at once
+ * @microcode_ptr:	Pointer to microcode, or 0 if none
+ * @msr_table_ptr:	Pointer to saved MSRs, a list of struct saved_msr
+ * @msr_count:		Number of saved MSRs
+ * @c_handler:		C function to call once early init is complete
+ * @ap_count:		Shared atomic value to allocate CPU indexes
+ */
+struct sipi_params {
+	u32 idt_ptr;
+	u32 stack_top;
+	u32 stack_size;
+	u32 microcode_lock;
+	u32 microcode_ptr;
+	u32 msr_table_ptr;
+	u32 msr_count;
+	u32 c_handler;
+	atomic_t ap_count;
+};
+
+/* 16-bit AP entry point */
+void ap_start16(void);
+
+/* end of 16-bit code/data, marks the region to be copied to SIP vector */
+void ap_start16_code_end(void);
+
+/* 32-bit AP entry point */
+void ap_start(void);
+
+extern char sipi_params_16bit[];
+extern char sipi_params[];
+
+#endif
diff --git a/arch/x86/include/asm/smm.h b/arch/x86/include/asm/smm.h
new file mode 100644
index 0000000..79b4a8e
--- /dev/null
+++ b/arch/x86/include/asm/smm.h
@@ -0,0 +1,14 @@
+/*
+ * Copyright (c) 2015 Google, Inc
+ * Copyright (C) 2008-2009 coresystems GmbH
+ *
+ * SPDX-License-Identifier:	GPL-2.0
+ */
+
+#ifndef CPU_X86_SMM_H
+#define CPU_X86_SMM_H
+
+#define SMM_DEFAULT_BASE 0x30000
+#define SMM_DEFAULT_SIZE 0x10000
+
+#endif
-- 
2.2.0.rc0.207.ga3a616c



More information about the U-Boot mailing list