diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 27295ef384..ff4f06ed79 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -603,6 +603,13 @@ config SMP only one CPU will be enabled regardless of the number of CPUs available. +config SMP_AP_WORK + bool + depends on SMP + help + Allow APs to do other work after initialisation instead of going + to sleep. + config MAX_CPUS int "Maximum number of CPUs permitted" depends on SMP diff --git a/arch/x86/cpu/Makefile b/arch/x86/cpu/Makefile index ee0499f5d7..16e67e3da2 100644 --- a/arch/x86/cpu/Makefile +++ b/arch/x86/cpu/Makefile @@ -60,7 +60,7 @@ ifndef CONFIG_SYS_COREBOOT obj-$(CONFIG_$(SPL_TPL_)X86_32BIT_INIT) += irq.o endif ifndef CONFIG_$(SPL_)X86_64 -obj-$(CONFIG_SMP) += mp_init.o +obj-$(CONFIG_$(SPL_)SMP) += mp_init.o endif obj-y += mtrr.o obj-$(CONFIG_PCI) += pci.o diff --git a/arch/x86/cpu/apollolake/Kconfig b/arch/x86/cpu/apollolake/Kconfig index 942f11f566..99d4e105c2 100644 --- a/arch/x86/cpu/apollolake/Kconfig +++ b/arch/x86/cpu/apollolake/Kconfig @@ -15,6 +15,7 @@ config INTEL_APOLLOLAKE select TPL_PCH_SUPPORT select PCH_SUPPORT select P2SB + select SMP_AP_WORK imply ENABLE_MRC_CACHE imply AHCI_PCI imply SCSI diff --git a/arch/x86/cpu/cpu.c b/arch/x86/cpu/cpu.c index 98ed66e67d..69c14189d1 100644 --- a/arch/x86/cpu/cpu.c +++ b/arch/x86/cpu/cpu.c @@ -67,10 +67,13 @@ static const char *const x86_vendor_name[] = { int __weak x86_cleanup_before_linux(void) { -#ifdef CONFIG_BOOTSTAGE_STASH + int ret; + + ret = mp_park_aps(); + if (ret) + return log_msg_ret("park", ret); bootstage_stash((void *)CONFIG_BOOTSTAGE_STASH_ADDR, CONFIG_BOOTSTAGE_STASH_SIZE); -#endif return 0; } @@ -201,18 +204,19 @@ int last_stage_init(void) write_tables(); -#ifdef CONFIG_GENERATE_ACPI_TABLE - fadt = acpi_find_fadt(); + if (IS_ENABLED(CONFIG_GENERATE_ACPI_TABLE)) { + fadt = acpi_find_fadt(); - /* Don't touch ACPI hardware on HW reduced platforms */ - if (fadt && !(fadt->flags & ACPI_FADT_HW_REDUCED_ACPI)) { - /* - * Other than waiting for OSPM to request us to switch to ACPI - * mode, do it by ourselves, since SMI will not be triggered. - */ - enter_acpi_mode(fadt->pm1a_cnt_blk); + /* Don't touch ACPI hardware on HW reduced platforms */ + if (fadt && !(fadt->flags & ACPI_FADT_HW_REDUCED_ACPI)) { + /* + * Other than waiting for OSPM to request us to switch + * to ACPI * mode, do it by ourselves, since SMI will + * not be triggered. + */ + enter_acpi_mode(fadt->pm1a_cnt_blk); + } } -#endif return 0; } @@ -220,19 +224,20 @@ int last_stage_init(void) static int x86_init_cpus(void) { -#ifdef CONFIG_SMP - debug("Init additional CPUs\n"); - x86_mp_init(); -#else - struct udevice *dev; + if (IS_ENABLED(CONFIG_SMP)) { + debug("Init additional CPUs\n"); + x86_mp_init(); + } else { + struct udevice *dev; - /* - * This causes the cpu-x86 driver to be probed. - * We don't check return value here as we want to allow boards - * which have not been converted to use cpu uclass driver to boot. - */ - uclass_first_device(UCLASS_CPU, &dev); -#endif + /* + * This causes the cpu-x86 driver to be probed. + * We don't check return value here as we want to allow boards + * which have not been converted to use cpu uclass driver to + * boot. + */ + uclass_first_device(UCLASS_CPU, &dev); + } return 0; } @@ -276,9 +281,8 @@ int reserve_arch(void) if (IS_ENABLED(CONFIG_ENABLE_MRC_CACHE)) mrccache_reserve(); -#ifdef CONFIG_SEABIOS - high_table_reserve(); -#endif + if (IS_ENABLED(CONFIG_SEABIOS)) + high_table_reserve(); if (IS_ENABLED(CONFIG_HAVE_ACPI_RESUME)) { acpi_s3_reserve(); diff --git a/arch/x86/cpu/i386/cpu.c b/arch/x86/cpu/i386/cpu.c index a6a6afec8c..8f342dd06e 100644 --- a/arch/x86/cpu/i386/cpu.c +++ b/arch/x86/cpu/i386/cpu.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -630,32 +631,15 @@ int cpu_jump_to_64bit_uboot(ulong target) return -EFAULT; } -#ifdef CONFIG_SMP -static int enable_smis(struct udevice *cpu, void *unused) -{ - return 0; -} - -static struct mp_flight_record mp_steps[] = { - MP_FR_BLOCK_APS(mp_init_cpu, NULL, mp_init_cpu, NULL), - /* Wait for APs to finish initialization before proceeding */ - MP_FR_BLOCK_APS(NULL, NULL, enable_smis, NULL), -}; - int x86_mp_init(void) { - struct mp_params mp_params; + int ret; - mp_params.parallel_microcode_load = 0, - mp_params.flight_plan = &mp_steps[0]; - mp_params.num_records = ARRAY_SIZE(mp_steps); - mp_params.microcode_pointer = 0; - - if (mp_init(&mp_params)) { + ret = mp_init(); + if (ret) { printf("Warning: MP init failure\n"); - return -EIO; + return log_ret(ret); } return 0; } -#endif diff --git a/arch/x86/cpu/mp_init.c b/arch/x86/cpu/mp_init.c index 7fde4ff7e1..d2f1ee38cf 100644 --- a/arch/x86/cpu/mp_init.c +++ b/arch/x86/cpu/mp_init.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -31,29 +32,132 @@ DECLARE_GLOBAL_DATA_PTR; -/* Total CPUs include BSP */ -static int num_cpus; +/* + * Setting up multiprocessing + * + * See https://www.intel.com/content/www/us/en/intelligent-systems/intel-boot-loader-development-kit/minimal-intel-architecture-boot-loader-paper.html + * + * Note that this file refers to the boot CPU (the one U-Boot is running on) as + * the BSP (BootStrap Processor) and the others as APs (Application Processors). + * + * This module works by loading some setup code into RAM at AP_DEFAULT_BASE and + * telling each AP to execute it. The code that each AP runs is in + * sipi_vector.S (see ap_start16) which includes a struct sipi_params at the + * end of it. Those parameters are set up by the C code. + + * Setting up is handled by load_sipi_vector(). It inits the common block of + * parameters (sipi_params) which tell the APs what to do. This block includes + * microcode and the MTTRs (Memory-Type-Range Registers) from the main CPU. + * There is also an ap_count which each AP increments as it starts up, so the + * BSP can tell how many checked in. + * + * The APs are started with a SIPI (Startup Inter-Processor Interrupt) which + * tells an AP to start executing at a particular address, in this case + * AP_DEFAULT_BASE which contains the code copied from ap_start16. This protocol + * is handled by start_aps(). + * + * After being started, each AP runs the code in ap_start16, switches to 32-bit + * mode, runs the code at ap_start, then jumps to c_handler which is ap_init(). + * This runs a very simple 'flight plan' described in mp_steps(). This sets up + * the CPU and waits for further instructions by looking at its entry in + * ap_callbacks[]. Note that the flight plan is only actually run for each CPU + * in bsp_do_flight_plan(): once the BSP completes each flight record, it sets + * mp_flight_record->barrier to 1 to allow the APs to executed the record one + * by one. + * + * CPUS are numbered sequentially from 0 using the device tree: + * + * cpus { + * u-boot,dm-pre-reloc; + * #address-cells = <1>; + * #size-cells = <0>; + * + * cpu@0 { + * u-boot,dm-pre-reloc; + * device_type = "cpu"; + * compatible = "intel,apl-cpu"; + * reg = <0>; + * intel,apic-id = <0>; + * }; + * + * cpu@1 { + * device_type = "cpu"; + * compatible = "intel,apl-cpu"; + * reg = <1>; + * intel,apic-id = <2>; + * }; + * + * Here the 'reg' property is the CPU number and then is placed in dev->req_seq + * so that we can index into ap_callbacks[] using that. The APIC ID is different + * and may not be sequential (it typically is if hyperthreading is supported). + * + * Once APs are inited they wait in ap_wait_for_instruction() for instructions. + * Instructions come in the form of a function to run. This logic is in + * mp_run_on_cpus() which supports running on any one AP, all APs, just the BSP + * or all CPUs. The BSP logic is handled directly in mp_run_on_cpus(), by + * calling the function. For the APs, callback information is stored in a + * single, common struct mp_callback and a pointer to this is written to each + * AP's slot in ap_callbacks[] by run_ap_work(). All APs get the message even + * if it is only for one of them. When an AP notices a message it checks whether + * it should call the function (see check in ap_wait_for_instruction()) and then + * does so if needed. After that it sets its slot to NULL to indicate it is + * done. + * + * While U-Boot is running it can use mp_run_on_cpus() to run code on the APs. + * An example of this is the 'mtrr' command which allows reading and changing + * the MTRRs on all CPUs. + * + * Before U-Boot exits it calls mp_park_aps() which tells all CPUs to halt by + * executing a 'hlt' instruction. That allows them to be used by Linux when it + * starts up. + */ /* This also needs to match the sipi.S assembly code for saved MSR encoding */ -struct saved_msr { +struct __packed saved_msr { uint32_t index; uint32_t lo; uint32_t hi; -} __packed; - +}; +/** + * struct mp_flight_plan - Holds the flight plan + * + * @num_records: Number of flight records + * @records: Pointer to each record + */ struct mp_flight_plan { int num_records; struct mp_flight_record *records; }; +/** + * struct mp_callback - Callback information for APs + * + * @func: Function to run + * @arg: Argument to pass to the function + * @logical_cpu_number: Either a CPU number (i.e. dev->req_seq) or a special + * value like MP_SELECT_BSP. It tells the AP whether it should process this + * callback + */ +struct mp_callback { + mp_run_func func; + void *arg; + int logical_cpu_number; +}; + +/* Stores the flight plan so that APs can find it */ static struct mp_flight_plan mp_info; -struct cpu_map { - struct udevice *dev; - int apic_id; - int err_code; -}; +/* + * ap_callbacks - Callback mailbox array + * + * Array of callback, one entry for each available CPU, indexed by the CPU + * number, which is dev->req_seq. The entry for the main CPU is never used. + * When this is NULL, there is no pending work for the CPU to run. When + * non-NULL it points to the mp_callback structure. This is shared between all + * CPUs, so should only be written by the main CPU. + */ +static struct mp_callback **ap_callbacks; static inline void barrier_wait(atomic_t *b) { @@ -151,11 +255,12 @@ static void ap_init(unsigned int cpu_index) debug("AP: slot %d apic_id %x, dev %s\n", cpu_index, apic_id, dev ? dev->name : "(apic_id not found)"); - /* Walk the flight plan */ + /* + * Walk the flight plan, which only returns if CONFIG_SMP_AP_WORK is not + * enabled + */ ap_do_flight_plan(dev); - /* Park the AP */ - debug("parking\n"); done: stop_this_cpu(); } @@ -309,13 +414,26 @@ static int apic_wait_timeout(int total_delay, const char *msg) return 0; } -static int start_aps(int ap_count, atomic_t *num_aps) +/** + * start_aps() - Start up the APs and count how many we find + * + * This is called on the boot processor to start up all the other processors + * (here called APs). + * + * @num_aps: Number of APs we expect to find + * @ap_count: Initially zero. Incremented by this function for each AP found + * @return 0 if all APs were set up correctly or there are none to set up, + * -ENOSPC if the SIPI vector is too high in memory, + * -ETIMEDOUT if the ICR is busy or the second SIPI fails to complete + * -EIO if not all APs check in correctly + */ +static int start_aps(int num_aps, atomic_t *ap_count) { int sipi_vector; /* Max location is 4KiB below 1MiB */ const int max_vector_loc = ((1 << 20) - (1 << 12)) >> 12; - if (ap_count == 0) + if (num_aps == 0) return 0; /* The vector is sent as a 4k aligned address in one byte */ @@ -327,7 +445,7 @@ static int start_aps(int ap_count, atomic_t *num_aps) return -ENOSPC; } - debug("Attempting to start %d APs\n", ap_count); + debug("Attempting to start %d APs\n", num_aps); if (apic_wait_timeout(1000, "ICR not to be busy")) return -ETIMEDOUT; @@ -350,7 +468,7 @@ static int start_aps(int ap_count, atomic_t *num_aps) return -ETIMEDOUT; /* Wait for CPUs to check in up to 200 us */ - wait_for_aps(num_aps, ap_count, 200, 15); + wait_for_aps(ap_count, num_aps, 200, 15); /* Send 2nd SIPI */ if (apic_wait_timeout(1000, "ICR not to be busy")) @@ -363,25 +481,35 @@ static int start_aps(int ap_count, atomic_t *num_aps) return -ETIMEDOUT; /* Wait for CPUs to check in */ - if (wait_for_aps(num_aps, ap_count, 10000, 50)) { + if (wait_for_aps(ap_count, num_aps, 10000, 50)) { debug("Not all APs checked in: %d/%d\n", - atomic_read(num_aps), ap_count); + atomic_read(ap_count), num_aps); return -EIO; } return 0; } -static int bsp_do_flight_plan(struct udevice *cpu, struct mp_params *mp_params) +/** + * bsp_do_flight_plan() - Do the flight plan on the BSP + * + * This runs the flight plan on the main CPU used to boot U-Boot + * + * @cpu: Device for the main CPU + * @plan: Flight plan to run + * @num_aps: Number of APs (CPUs other than the BSP) + * @returns 0 on success, -ETIMEDOUT if an AP failed to come up + */ +static int bsp_do_flight_plan(struct udevice *cpu, struct mp_flight_plan *plan, + int num_aps) { int i; int ret = 0; const int timeout_us = 100000; const int step_us = 100; - int num_aps = num_cpus - 1; - for (i = 0; i < mp_params->num_records; i++) { - struct mp_flight_record *rec = &mp_params->flight_plan[i]; + for (i = 0; i < plan->num_records; i++) { + struct mp_flight_record *rec = &plan->records[i]; /* Wait for APs if the record is not released */ if (atomic_read(&rec->barrier) == 0) { @@ -398,12 +526,22 @@ static int bsp_do_flight_plan(struct udevice *cpu, struct mp_params *mp_params) release_barrier(&rec->barrier); } + return ret; } -static int init_bsp(struct udevice **devp) +/** + * get_bsp() - Get information about the bootstrap processor + * + * @devp: If non-NULL, returns CPU device corresponding to the BSP + * @cpu_countp: If non-NULL, returns the total number of CPUs + * @return CPU number of the BSP, or -ve on error. If multiprocessing is not + * enabled, returns 0 + */ +static int get_bsp(struct udevice **devp, int *cpu_countp) { char processor_name[CPU_MAX_NAME_LEN]; + struct udevice *dev; int apic_id; int ret; @@ -411,61 +549,333 @@ static int init_bsp(struct udevice **devp) debug("CPU: %s\n", processor_name); apic_id = lapicid(); - ret = find_cpu_by_apic_id(apic_id, devp); - if (ret) { + ret = find_cpu_by_apic_id(apic_id, &dev); + if (ret < 0) { printf("Cannot find boot CPU, APIC ID %d\n", apic_id); return ret; } + ret = cpu_get_count(dev); + if (ret < 0) + return log_msg_ret("count", ret); + if (devp) + *devp = dev; + if (cpu_countp) + *cpu_countp = ret; + + return dev->req_seq >= 0 ? dev->req_seq : 0; +} + +/** + * read_callback() - Read the pointer in a callback slot + * + * This is called by APs to read their callback slot to see if there is a + * pointer to new instructions + * + * @slot: Pointer to the AP's callback slot + * @return value of that pointer + */ +static struct mp_callback *read_callback(struct mp_callback **slot) +{ + dmb(); + + return *slot; +} + +/** + * store_callback() - Store a pointer to the callback slot + * + * This is called by APs to write NULL into the callback slot when they have + * finished the work requested by the BSP. + * + * @slot: Pointer to the AP's callback slot + * @val: Value to write (e.g. NULL) + */ +static void store_callback(struct mp_callback **slot, struct mp_callback *val) +{ + *slot = val; + dmb(); +} + +/** + * run_ap_work() - Run a callback on selected APs + * + * This writes @callback to all APs and waits for them all to acknowledge it, + * Note that whether each AP actually calls the callback depends on the value + * of logical_cpu_number (see struct mp_callback). The logical CPU number is + * the CPU device's req->seq value. + * + * @callback: Callback information to pass to all APs + * @bsp: CPU device for the BSP + * @num_cpus: The number of CPUs in the system (= number of APs + 1) + * @expire_ms: Timeout to wait for all APs to finish, in milliseconds, or 0 for + * no timeout + * @return 0 if OK, -ETIMEDOUT if one or more APs failed to respond in time + */ +static int run_ap_work(struct mp_callback *callback, struct udevice *bsp, + int num_cpus, uint expire_ms) +{ + int cur_cpu = bsp->req_seq; + int num_aps = num_cpus - 1; /* number of non-BSPs to get this message */ + int cpus_accepted; + ulong start; + int i; + + if (!IS_ENABLED(CONFIG_SMP_AP_WORK)) { + printf("APs already parked. CONFIG_SMP_AP_WORK not enabled\n"); + return -ENOTSUPP; + } + + /* Signal to all the APs to run the func. */ + for (i = 0; i < num_cpus; i++) { + if (cur_cpu != i) + store_callback(&ap_callbacks[i], callback); + } + mfence(); + + /* Wait for all the APs to signal back that call has been accepted. */ + start = get_timer(0); + + do { + mdelay(1); + cpus_accepted = 0; + + for (i = 0; i < num_cpus; i++) { + if (cur_cpu == i) + continue; + if (!read_callback(&ap_callbacks[i])) + cpus_accepted++; + } + + if (expire_ms && get_timer(start) >= expire_ms) { + log(UCLASS_CPU, LOGL_CRIT, + "AP call expired; %d/%d CPUs accepted\n", + cpus_accepted, num_aps); + return -ETIMEDOUT; + } + } while (cpus_accepted != num_aps); + + /* Make sure we can see any data written by the APs */ + mfence(); + + return 0; +} + +/** + * ap_wait_for_instruction() - Wait for and process requests from the main CPU + * + * This is called by APs (here, everything other than the main boot CPU) to + * await instructions. They arrive in the form of a function call and argument, + * which is then called. This uses a simple mailbox with atomic read/set + * + * @cpu: CPU that is waiting + * @unused: Optional argument provided by struct mp_flight_record, not used here + * @return Does not return + */ +static int ap_wait_for_instruction(struct udevice *cpu, void *unused) +{ + struct mp_callback lcb; + struct mp_callback **per_cpu_slot; + + if (!IS_ENABLED(CONFIG_SMP_AP_WORK)) + return 0; + + per_cpu_slot = &ap_callbacks[cpu->req_seq]; + + while (1) { + struct mp_callback *cb = read_callback(per_cpu_slot); + + if (!cb) { + asm ("pause"); + continue; + } + + /* Copy to local variable before using the value */ + memcpy(&lcb, cb, sizeof(lcb)); + mfence(); + if (lcb.logical_cpu_number == MP_SELECT_ALL || + lcb.logical_cpu_number == MP_SELECT_APS || + cpu->req_seq == lcb.logical_cpu_number) + lcb.func(lcb.arg); + + /* Indicate we are finished */ + store_callback(per_cpu_slot, NULL); + } return 0; } -int mp_init(struct mp_params *p) +static int mp_init_cpu(struct udevice *cpu, void *unused) { - int num_aps; - atomic_t *ap_count; - struct udevice *cpu; + struct cpu_platdata *plat = dev_get_parent_platdata(cpu); + + plat->ucode_version = microcode_read_rev(); + plat->device_id = gd->arch.x86_device; + + return device_probe(cpu); +} + +static struct mp_flight_record mp_steps[] = { + MP_FR_BLOCK_APS(mp_init_cpu, NULL, mp_init_cpu, NULL), + MP_FR_BLOCK_APS(ap_wait_for_instruction, NULL, NULL, NULL), +}; + +int mp_run_on_cpus(int cpu_select, mp_run_func func, void *arg) +{ + struct mp_callback lcb = { + .func = func, + .arg = arg, + .logical_cpu_number = cpu_select, + }; + struct udevice *dev; + int num_cpus; int ret; - /* This will cause the CPUs devices to be bound */ - struct uclass *uc; - ret = uclass_get(UCLASS_CPU, &uc); + ret = get_bsp(&dev, &num_cpus); + if (ret < 0) + return log_msg_ret("bsp", ret); + if (cpu_select == MP_SELECT_ALL || cpu_select == MP_SELECT_BSP || + cpu_select == ret) { + /* Run on BSP first */ + func(arg); + } + + if (!IS_ENABLED(CONFIG_SMP_AP_WORK) || + !(gd->flags & GD_FLG_SMP_READY)) { + /* Allow use of this function on the BSP only */ + if (cpu_select == MP_SELECT_BSP || !cpu_select) + return 0; + return -ENOTSUPP; + } + + /* Allow up to 1 second for all APs to finish */ + ret = run_ap_work(&lcb, dev, num_cpus, 1000 /* ms */); if (ret) + return log_msg_ret("aps", ret); + + return 0; +} + +static void park_this_cpu(void *unused) +{ + stop_this_cpu(); +} + +int mp_park_aps(void) +{ + int ret; + + ret = mp_run_on_cpus(MP_SELECT_APS, park_this_cpu, NULL); + if (ret) + return log_ret(ret); + + return 0; +} + +int mp_first_cpu(int cpu_select) +{ + struct udevice *dev; + int num_cpus; + int ret; + + /* + * This assumes that CPUs are numbered from 0. This function tries to + * avoid assuming the CPU 0 is the boot CPU + */ + if (cpu_select == MP_SELECT_ALL) + return 0; /* start with the first one */ + + ret = get_bsp(&dev, &num_cpus); + if (ret < 0) + return log_msg_ret("bsp", ret); + + /* Return boot CPU if requested */ + if (cpu_select == MP_SELECT_BSP) return ret; + /* Return something other than the boot CPU, if APs requested */ + if (cpu_select == MP_SELECT_APS && num_cpus > 1) + return ret == 0 ? 1 : 0; + + /* Try to check for an invalid value */ + if (cpu_select < 0 || cpu_select >= num_cpus) + return -EINVAL; + + return cpu_select; /* return the only selected one */ +} + +int mp_next_cpu(int cpu_select, int prev_cpu) +{ + struct udevice *dev; + int num_cpus; + int ret; + int bsp; + + /* If we selected the BSP or a particular single CPU, we are done */ + if (!IS_ENABLED(CONFIG_SMP_AP_WORK) || cpu_select == MP_SELECT_BSP || + cpu_select >= 0) + return -EFBIG; + + /* Must be doing MP_SELECT_ALL or MP_SELECT_APS; return the next CPU */ + ret = get_bsp(&dev, &num_cpus); + if (ret < 0) + return log_msg_ret("bsp", ret); + bsp = ret; + + /* Move to the next CPU */ + assert(prev_cpu >= 0); + ret = prev_cpu + 1; + + /* Skip the BSP if needed */ + if (cpu_select == MP_SELECT_APS && ret == bsp) + ret++; + if (ret >= num_cpus) + return -EFBIG; + + return ret; +} + +int mp_init(void) +{ + int num_aps, num_cpus; + atomic_t *ap_count; + struct udevice *cpu; + struct uclass *uc; + int ret; + if (IS_ENABLED(CONFIG_QFW)) { ret = qemu_cpu_fixup(); if (ret) return ret; } - ret = init_bsp(&cpu); - if (ret) { + /* + * Multiple APs are brought up simultaneously and they may get the same + * seq num in the uclass_resolve_seq() during device_probe(). To avoid + * this, set req_seq to the reg number in the device tree in advance. + */ + uclass_id_foreach_dev(UCLASS_CPU, cpu, uc) + cpu->req_seq = dev_read_u32_default(cpu, "reg", -1); + + ret = get_bsp(&cpu, &num_cpus); + if (ret < 0) { debug("Cannot init boot CPU: err=%d\n", ret); return ret; } - if (p == NULL || p->flight_plan == NULL || p->num_records < 1) { - printf("Invalid MP parameters\n"); - return -EINVAL; - } - - num_cpus = cpu_get_count(cpu); - if (num_cpus < 0) { - debug("Cannot get number of CPUs: err=%d\n", num_cpus); - return num_cpus; - } - if (num_cpus < 2) debug("Warning: Only 1 CPU is detected\n"); ret = check_cpu_devices(num_cpus); if (ret) - debug("Warning: Device tree does not describe all CPUs. Extra ones will not be started correctly\n"); + log_warning("Warning: Device tree does not describe all CPUs. Extra ones will not be started correctly\n"); + + ap_callbacks = calloc(num_cpus, sizeof(struct mp_callback *)); + if (!ap_callbacks) + return -ENOMEM; /* Copy needed parameters so that APs have a reference to the plan */ - mp_info.num_records = p->num_records; - mp_info.records = p->flight_plan; + mp_info.num_records = ARRAY_SIZE(mp_steps); + mp_info.records = mp_steps; /* Load the SIPI vector */ ret = load_sipi_vector(&ap_count, num_cpus); @@ -489,28 +899,12 @@ int mp_init(struct mp_params *p) } /* Walk the flight plan for the BSP */ - ret = bsp_do_flight_plan(cpu, p); + ret = bsp_do_flight_plan(cpu, &mp_info, num_aps); if (ret) { debug("CPU init failed: err=%d\n", ret); return ret; } + gd->flags |= GD_FLG_SMP_READY; return 0; } - -int mp_init_cpu(struct udevice *cpu, void *unused) -{ - struct cpu_platdata *plat = dev_get_parent_platdata(cpu); - - /* - * Multiple APs are brought up simultaneously and they may get the same - * seq num in the uclass_resolve_seq() during device_probe(). To avoid - * this, set req_seq to the reg number in the device tree in advance. - */ - cpu->req_seq = fdtdec_get_int(gd->fdt_blob, dev_of_offset(cpu), "reg", - -1); - plat->ucode_version = microcode_read_rev(); - plat->device_id = gd->arch.x86_device; - - return device_probe(cpu); -} diff --git a/arch/x86/cpu/mtrr.c b/arch/x86/cpu/mtrr.c index 7ec0733337..2468d88a80 100644 --- a/arch/x86/cpu/mtrr.c +++ b/arch/x86/cpu/mtrr.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -63,10 +64,71 @@ static void set_var_mtrr(uint reg, uint type, uint64_t start, uint64_t size) wrmsrl(MTRR_PHYS_MASK_MSR(reg), mask | MTRR_PHYS_MASK_VALID); } +void mtrr_read_all(struct mtrr_info *info) +{ + int i; + + for (i = 0; i < MTRR_COUNT; i++) { + info->mtrr[i].base = native_read_msr(MTRR_PHYS_BASE_MSR(i)); + info->mtrr[i].mask = native_read_msr(MTRR_PHYS_MASK_MSR(i)); + } +} + +void mtrr_write_all(struct mtrr_info *info) +{ + struct mtrr_state state; + int i; + + for (i = 0; i < MTRR_COUNT; i++) { + mtrr_open(&state, true); + wrmsrl(MTRR_PHYS_BASE_MSR(i), info->mtrr[i].base); + wrmsrl(MTRR_PHYS_MASK_MSR(i), info->mtrr[i].mask); + mtrr_close(&state, true); + } +} + +static void write_mtrrs(void *arg) +{ + struct mtrr_info *info = arg; + + mtrr_write_all(info); +} + +static void read_mtrrs(void *arg) +{ + struct mtrr_info *info = arg; + + mtrr_read_all(info); +} + +/** + * mtrr_copy_to_aps() - Copy the MTRRs from the boot CPU to other CPUs + * + * @return 0 on success, -ve on failure + */ +static int mtrr_copy_to_aps(void) +{ + struct mtrr_info info; + int ret; + + ret = mp_run_on_cpus(MP_SELECT_BSP, read_mtrrs, &info); + if (ret == -ENXIO) + return 0; + else if (ret) + return log_msg_ret("bsp", ret); + + ret = mp_run_on_cpus(MP_SELECT_APS, write_mtrrs, &info); + if (ret) + return log_msg_ret("bsp", ret); + + return 0; +} + int mtrr_commit(bool do_caches) { struct mtrr_request *req = gd->arch.mtrr_req; struct mtrr_state state; + int ret; int i; debug("%s: enabled=%d, count=%d\n", __func__, gd->arch.has_mtrr, @@ -88,6 +150,12 @@ int mtrr_commit(bool do_caches) mtrr_close(&state, do_caches); debug("mtrr done\n"); + if (gd->flags & GD_FLG_RELOC) { + ret = mtrr_copy_to_aps(); + if (ret) + return log_msg_ret("copy", ret); + } + return 0; } @@ -153,3 +221,84 @@ int mtrr_set_next_var(uint type, uint64_t start, uint64_t size) return 0; } + +/** enum mtrr_opcode - supported operations for mtrr_do_oper() */ +enum mtrr_opcode { + MTRR_OP_SET, + MTRR_OP_SET_VALID, +}; + +/** + * struct mtrr_oper - An MTRR operation to perform on a CPU + * + * @opcode: Indicates operation to perform + * @reg: MTRR reg number to select (0-7, -1 = all) + * @valid: Valid value to write for MTRR_OP_SET_VALID + * @base: Base value to write for MTRR_OP_SET + * @mask: Mask value to write for MTRR_OP_SET + */ +struct mtrr_oper { + enum mtrr_opcode opcode; + int reg; + bool valid; + u64 base; + u64 mask; +}; + +static void mtrr_do_oper(void *arg) +{ + struct mtrr_oper *oper = arg; + u64 mask; + + switch (oper->opcode) { + case MTRR_OP_SET_VALID: + mask = native_read_msr(MTRR_PHYS_MASK_MSR(oper->reg)); + if (oper->valid) + mask |= MTRR_PHYS_MASK_VALID; + else + mask &= ~MTRR_PHYS_MASK_VALID; + wrmsrl(MTRR_PHYS_MASK_MSR(oper->reg), mask); + break; + case MTRR_OP_SET: + wrmsrl(MTRR_PHYS_BASE_MSR(oper->reg), oper->base); + wrmsrl(MTRR_PHYS_MASK_MSR(oper->reg), oper->mask); + break; + } +} + +static int mtrr_start_op(int cpu_select, struct mtrr_oper *oper) +{ + struct mtrr_state state; + int ret; + + mtrr_open(&state, true); + ret = mp_run_on_cpus(cpu_select, mtrr_do_oper, oper); + mtrr_close(&state, true); + if (ret) + return log_msg_ret("run", ret); + + return 0; +} + +int mtrr_set_valid(int cpu_select, int reg, bool valid) +{ + struct mtrr_oper oper; + + oper.opcode = MTRR_OP_SET_VALID; + oper.reg = reg; + oper.valid = valid; + + return mtrr_start_op(cpu_select, &oper); +} + +int mtrr_set(int cpu_select, int reg, u64 base, u64 mask) +{ + struct mtrr_oper oper; + + oper.opcode = MTRR_OP_SET; + oper.reg = reg; + oper.base = base; + oper.mask = mask; + + return mtrr_start_op(cpu_select, &oper); +} diff --git a/arch/x86/include/asm/mp.h b/arch/x86/include/asm/mp.h index 9dddf88b5a..5f9b8c6564 100644 --- a/arch/x86/include/asm/mp.h +++ b/arch/x86/include/asm/mp.h @@ -11,6 +11,17 @@ #include #include +enum { + /* Indicates that the function should run on all CPUs */ + MP_SELECT_ALL = -1, + + /* Run on boot CPUs */ + MP_SELECT_BSP = -2, + + /* Run on non-boot CPUs */ + MP_SELECT_APS = -3, +}; + typedef int (*mp_callback_t)(struct udevice *cpu, void *arg); /* @@ -25,6 +36,14 @@ typedef int (*mp_callback_t)(struct udevice *cpu, void *arg); * * Note that ap_call() and bsp_call() can be NULL. In the NULL case the * callback will just not be called. + * + * @barrier: Ensures that the BSP and AP don't run the flight record at the same + * time + * @cpus_entered: Counts the number of APs that have run this record + * @ap_call: Function for the APs to call + * @ap_arg: Argument to pass to @ap_call + * @bsp_call: Function for the BSP to call + * @bsp_arg: Argument to pass to @bsp_call */ struct mp_flight_record { atomic_t barrier; @@ -51,21 +70,6 @@ struct mp_flight_record { #define MP_FR_NOBLOCK_APS(ap_func, ap_arg, bsp_func, bsp_arg) \ MP_FLIGHT_RECORD(1, ap_func, ap_arg, bsp_func, bsp_arg) -/* - * The mp_params structure provides the arguments to the mp subsystem - * for bringing up APs. - * - * At present this is overkill for U-Boot, but it may make it easier to add - * SMM support. - */ -struct mp_params { - int parallel_microcode_load; - const void *microcode_pointer; - /* Flight plan for APs and BSP */ - struct mp_flight_record *flight_plan; - int num_records; -}; - /* * mp_init() will set up the SIPI vector and bring up the APs according to * mp_params. Each flight record will be executed according to the plan. Note @@ -85,12 +89,105 @@ struct mp_params { * * mp_init() returns < 0 on error, 0 on success. */ -int mp_init(struct mp_params *params); +int mp_init(void); -/* Probes the CPU device */ -int mp_init_cpu(struct udevice *cpu, void *unused); - -/* Set up additional CPUs */ +/** + * x86_mp_init() - Set up additional CPUs + * + * @returns < 0 on error, 0 on success. + */ int x86_mp_init(void); +/** + * mp_run_func() - Function to call on the AP + * + * @arg: Argument to pass + */ +typedef void (*mp_run_func)(void *arg); + +#if CONFIG_IS_ENABLED(SMP) && !CONFIG_IS_ENABLED(X86_64) +/** + * mp_run_on_cpus() - Run a function on one or all CPUs + * + * This does not return until all CPUs have completed the work + * + * Running on anything other than the boot CPU is only supported if + * CONFIG_SMP_AP_WORK is enabled + * + * @cpu_select: CPU to run on (its dev->req_seq value), or MP_SELECT_ALL for + * all, or MP_SELECT_BSP for BSP + * @func: Function to run + * @arg: Argument to pass to the function + * @return 0 on success, -ve on error + */ +int mp_run_on_cpus(int cpu_select, mp_run_func func, void *arg); + +/** + * mp_park_aps() - Park the APs ready for the OS + * + * This halts all CPUs except the main one, ready for the OS to use them + * + * @return 0 if OK, -ve on error + */ +int mp_park_aps(void); + +/** + * mp_first_cpu() - Get the first CPU to process, from a selection + * + * This is used to iterate through selected CPUs. Call this function first, then + * call mp_next_cpu() repeatedly (with the same @cpu_select) until it returns + * -EFBIG. + * + * @cpu_select: Selected CPUs (either a CPU number or MP_SELECT_...) + * @return next CPU number to run on (e.g. 0) + */ +int mp_first_cpu(int cpu_select); + +/** + * mp_next_cpu() - Get the next CPU to process, from a selection + * + * This is used to iterate through selected CPUs. After first calling + * mp_first_cpu() once, call this function repeatedly until it returns -EFBIG. + * + * The value of @cpu_select must be the same for all calls and must match the + * value passed to mp_first_cpu(), otherwise the behaviour is undefined. + * + * @cpu_select: Selected CPUs (either a CPU number or MP_SELECT_...) + * @prev_cpu: Previous value returned by mp_first_cpu()/mp_next_cpu() + * @return next CPU number to run on (e.g. 0) + */ +int mp_next_cpu(int cpu_select, int prev_cpu); +#else +static inline int mp_run_on_cpus(int cpu_select, mp_run_func func, void *arg) +{ + /* There is only one CPU, so just call the function here */ + func(arg); + + return 0; +} + +static inline int mp_park_aps(void) +{ + /* No APs to park */ + + return 0; +} + +static inline int mp_first_cpu(int cpu_select) +{ + /* We cannot run on any APs, nor a selected CPU */ + return cpu_select == MP_SELECT_APS ? -EFBIG : MP_SELECT_BSP; +} + +static inline int mp_next_cpu(int cpu_select, int prev_cpu) +{ + /* + * When MP is not enabled, there is only one CPU and we did it in + * mp_first_cpu() + */ + return -EFBIG; +} + +#endif + #endif /* _X86_MP_H_ */ diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index 212a699c1b..48db1dd82f 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h @@ -70,6 +70,26 @@ struct mtrr_state { bool enable_cache; }; +/** + * struct mtrr - Information about a single MTRR + * + * @base: Base address and MTRR_BASE_TYPE_MASK + * @mask: Mask and MTRR_PHYS_MASK_VALID + */ +struct mtrr { + u64 base; + u64 mask; +}; + +/** + * struct mtrr_info - Information about all MTRRs + * + * @mtrr: Information about each mtrr + */ +struct mtrr_info { + struct mtrr mtrr[MTRR_COUNT]; +}; + /** * mtrr_open() - Prepare to adjust MTRRs * @@ -129,6 +149,37 @@ int mtrr_commit(bool do_caches); */ int mtrr_set_next_var(uint type, uint64_t base, uint64_t size); +/** + * mtrr_read_all() - Save all the MTRRs + * + * This reads all MTRRs from the boot CPU into a struct so they can be loaded + * onto other CPUs + * + * @info: Place to put the MTRR info + */ +void mtrr_read_all(struct mtrr_info *info); + +/** + * mtrr_set_valid() - Set the valid flag for a selected MTRR and CPU(s) + * + * @cpu_select: Selected CPUs (either a CPU number or MP_SELECT_...) + * @reg: MTRR register to write (0-7) + * @valid: Valid flag to write + * @return 0 on success, -ve on error + */ +int mtrr_set_valid(int cpu_select, int reg, bool valid); + +/** + * mtrr_set() - Set the valid flag for a selected MTRR and CPU(s) + * + * @cpu_select: Selected CPUs (either a CPU number or MP_SELECT_...) + * @reg: MTRR register to write (0-7) + * @base: Base address and MTRR_BASE_TYPE_MASK + * @mask: Mask and MTRR_PHYS_MASK_VALID + * @return 0 on success, -ve on error + */ +int mtrr_set(int cpu_select, int reg, u64 base, u64 mask); + #endif #if ((CONFIG_XIP_ROM_SIZE & (CONFIG_XIP_ROM_SIZE - 1)) != 0) diff --git a/cmd/x86/mtrr.c b/cmd/x86/mtrr.c index 084d7315f4..e118bba5a2 100644 --- a/cmd/x86/mtrr.c +++ b/cmd/x86/mtrr.c @@ -5,7 +5,9 @@ #include #include +#include #include +#include #include static const char *const mtrr_type_name[MTRR_TYPE_COUNT] = { @@ -18,19 +20,32 @@ static const char *const mtrr_type_name[MTRR_TYPE_COUNT] = { "Back", }; -static int do_mtrr_list(void) +static void read_mtrrs(void *arg) { + struct mtrr_info *info = arg; + + mtrr_read_all(info); +} + +static int do_mtrr_list(int cpu_select) +{ + struct mtrr_info info; + int ret; int i; printf("Reg Valid Write-type %-16s %-16s %-16s\n", "Base ||", "Mask ||", "Size ||"); + memset(&info, '\0', sizeof(info)); + ret = mp_run_on_cpus(cpu_select, read_mtrrs, &info); + if (ret) + return log_msg_ret("run", ret); for (i = 0; i < MTRR_COUNT; i++) { const char *type = "Invalid"; uint64_t base, mask, size; bool valid; - base = native_read_msr(MTRR_PHYS_BASE_MSR(i)); - mask = native_read_msr(MTRR_PHYS_MASK_MSR(i)); + base = info.mtrr[i].base; + mask = info.mtrr[i].mask; size = ~mask & ((1ULL << CONFIG_CPU_ADDR_BITS) - 1); size |= (1 << 12) - 1; size += 1; @@ -44,14 +59,14 @@ static int do_mtrr_list(void) return 0; } -static int do_mtrr_set(uint reg, int argc, char *const argv[]) +static int do_mtrr_set(int cpu_select, uint reg, int argc, char *const argv[]) { const char *typename = argv[0]; - struct mtrr_state state; uint32_t start, size; uint64_t base, mask; int i, type = -1; bool valid; + int ret; if (argc < 3) return CMD_RET_USAGE; @@ -73,27 +88,9 @@ static int do_mtrr_set(uint reg, int argc, char *const argv[]) if (valid) mask |= MTRR_PHYS_MASK_VALID; - mtrr_open(&state, true); - wrmsrl(MTRR_PHYS_BASE_MSR(reg), base); - wrmsrl(MTRR_PHYS_MASK_MSR(reg), mask); - mtrr_close(&state, true); - - return 0; -} - -static int mtrr_set_valid(int reg, bool valid) -{ - struct mtrr_state state; - uint64_t mask; - - mtrr_open(&state, true); - mask = native_read_msr(MTRR_PHYS_MASK_MSR(reg)); - if (valid) - mask |= MTRR_PHYS_MASK_VALID; - else - mask &= ~MTRR_PHYS_MASK_VALID; - wrmsrl(MTRR_PHYS_MASK_MSR(reg), mask); - mtrr_close(&state, true); + ret = mtrr_set(cpu_select, reg, base, mask); + if (ret) + return CMD_RET_FAILURE; return 0; } @@ -101,39 +98,92 @@ static int mtrr_set_valid(int reg, bool valid) static int do_mtrr(struct cmd_tbl *cmdtp, int flag, int argc, char *const argv[]) { - const char *cmd; + int cmd; + int cpu_select; uint reg; + int ret; - cmd = argv[1]; - if (argc < 2 || *cmd == 'l') - return do_mtrr_list(); - argc -= 2; - argv += 2; - if (argc <= 0) - return CMD_RET_USAGE; - reg = simple_strtoul(argv[0], NULL, 16); - if (reg >= MTRR_COUNT) { - printf("Invalid register number\n"); - return CMD_RET_USAGE; + cpu_select = MP_SELECT_BSP; + if (argc >= 3 && !strcmp("-c", argv[1])) { + const char *cpustr; + + cpustr = argv[2]; + if (*cpustr == 'a') + cpu_select = MP_SELECT_ALL; + else + cpu_select = simple_strtol(cpustr, NULL, 16); + argc -= 2; + argv += 2; + } + argc--; + argv++; + cmd = argv[0] ? *argv[0] : 0; + if (argc < 1 || !cmd) { + cmd = 'l'; + reg = 0; + } else { + if (argc < 2) + return CMD_RET_USAGE; + reg = simple_strtoul(argv[1], NULL, 16); + if (reg >= MTRR_COUNT) { + printf("Invalid register number\n"); + return CMD_RET_USAGE; + } + } + if (cmd == 'l') { + bool first; + int i; + + i = mp_first_cpu(cpu_select); + if (i < 0) { + printf("Invalid CPU (err=%d)\n", i); + return CMD_RET_FAILURE; + } + first = true; + for (; i >= 0; i = mp_next_cpu(cpu_select, i)) { + if (!first) + printf("\n"); + printf("CPU %d:\n", i); + ret = do_mtrr_list(i); + if (ret) { + printf("Failed to read CPU %d (err=%d)\n", i, + ret); + return CMD_RET_FAILURE; + } + first = false; + } + } else { + switch (cmd) { + case 'e': + ret = mtrr_set_valid(cpu_select, reg, true); + break; + case 'd': + ret = mtrr_set_valid(cpu_select, reg, false); + break; + case 's': + ret = do_mtrr_set(cpu_select, reg, argc - 2, argv + 2); + break; + default: + return CMD_RET_USAGE; + } + if (ret) { + printf("Operation failed (err=%d)\n", ret); + return CMD_RET_FAILURE; + } } - if (*cmd == 'e') - return mtrr_set_valid(reg, true); - else if (*cmd == 'd') - return mtrr_set_valid(reg, false); - else if (*cmd == 's') - return do_mtrr_set(reg, argc - 1, argv + 1); - else - return CMD_RET_USAGE; return 0; } U_BOOT_CMD( - mtrr, 6, 1, do_mtrr, + mtrr, 8, 1, do_mtrr, "Use x86 memory type range registers (32-bit only)", "[list] - list current registers\n" "set - set a register\n" "\t is Uncacheable, Combine, Through, Protect, Back\n" "disable - disable a register\n" - "ensable - enable a register" + "enable - enable a register\n" + "\n" + "Precede command with '-c |all' to access a particular hex CPU, e.g.\n" + " mtrr -c all list; mtrr -c 2e list" ); diff --git a/doc/board/google/chromebook_coral.rst b/doc/board/google/chromebook_coral.rst index 40bd9397d4..c39f1e310c 100644 --- a/doc/board/google/chromebook_coral.rst +++ b/doc/board/google/chromebook_coral.rst @@ -188,6 +188,7 @@ Partial memory map fef00000 1000 CONFIG_BOOTSTAGE_STASH_ADDR fef00000 Base of CAR region + 30000 AP_DEFAULT_BASE (used to start up additional CPUs) f0000 CONFIG_ROM_TABLE_ADDR 120000 BSS (defined in u-boot-spl.lds) 200000 FSP-S (which is run after U-Boot is relocated) diff --git a/drivers/core/acpi.c b/drivers/core/acpi.c index cdbc2c5cf5..7fe93992b5 100644 --- a/drivers/core/acpi.c +++ b/drivers/core/acpi.c @@ -195,7 +195,7 @@ static int sort_acpi_item_type(struct acpi_ctx *ctx, void *start, "u-boot,acpi-dsdt-order" : "u-boot,acpi-ssdt-order", &size); if (!order) { - log_warning("Failed to find ordering, leaving as is\n"); + log_debug("Failed to find ordering, leaving as is\n"); return 0; } diff --git a/include/asm-generic/global_data.h b/include/asm-generic/global_data.h index 8c78792cc9..d4a4e2215d 100644 --- a/include/asm-generic/global_data.h +++ b/include/asm-generic/global_data.h @@ -167,5 +167,6 @@ typedef struct global_data { #define GD_FLG_LOG_READY 0x08000 /* Log system is ready for use */ #define GD_FLG_WDT_READY 0x10000 /* Watchdog is ready for use */ #define GD_FLG_SKIP_LL_INIT 0x20000 /* Don't perform low-level init */ +#define GD_FLG_SMP_READY 0x40000 /* SMP init is complete */ #endif /* __ASM_GENERIC_GBL_DATA_H */