[U-Boot] [PATCH v3 6/6] PPC 85xx: Add qemu-ppce500 machine

Thu Feb 20 13:34:29 CET 2014

On 19.02.2014, at 01:03, Scott Wood <scottwood at freescale.com> wrote:

> On Tue, 2014-02-11 at 01:10 +0100, Alexander Graf wrote:
>> diff --git a/arch/powerpc/cpu/mpc85xx/start.S b/arch/powerpc/cpu/mpc85xx/start.S
>> index bb0025c..8982c78 100644
>> --- a/arch/powerpc/cpu/mpc85xx/start.S
>> +++ b/arch/powerpc/cpu/mpc85xx/start.S
>> @@ -80,6 +80,11 @@ _start_e500:
>> 	li	r1,MSR_DE
>> 	mtmsr 	r1
>> 
>> +#ifdef CONFIG_QEMU_E500
>> +	/* Save our ePAPR device tree pointer before we clobber it */
>> +	mr	r24, r3
>> +#endif
> 
> FWIW, it should be harmless to do this unconditionally (though in that
> case I'd insert "(if we have one)" in the comment.

Sure.

> 
>> #ifdef CONFIG_SYS_FSL_ERRATUM_A004510
>> 	mfspr	r3,SPRN_SVR
>> 	rlwinm	r3,r3,0,0xff
>> @@ -514,6 +519,7 @@ nexti:	mflr	r1		/* R1 = our PC */
>>  * As a general rule, TLB0 is used for short-term TLBs, and TLB1 is used for
>>  * long-term TLBs, so we use TLB0 here.
>>  */
>> +#if !defined(CONFIG_DYNAMIC_CCSRBAR)
>> #if (CONFIG_SYS_CCSRBAR_DEFAULT != CONFIG_SYS_CCSRBAR_PHYS)
> 
> This shouldn't be necessary, if you have
> CONFIG_SYS_CCSR_DO_NOT_RELOCATE.

You're right :).

> 
>> diff --git a/arch/powerpc/cpu/mpc85xx/tlb.c b/arch/powerpc/cpu/mpc85xx/tlb.c
>> index 2011fb8..0e0b483 100644
>> --- a/arch/powerpc/cpu/mpc85xx/tlb.c
>> +++ b/arch/powerpc/cpu/mpc85xx/tlb.c
>> @@ -36,6 +36,10 @@ void init_tlbs(void)
>> 			  tlb_table[i].mas7);
>> 	}
>> 
>> +#ifdef CONFIG_SYS_USE_DYNAMIC_TLBS
>> +	init_tlbs_dynamic();
>> +#endif
> 
> You could avoid the ifdef by moving a stub implementation into the
> header -- or possibly better, avoid the config symbol entirely by using
> a weak symbol.
> 
> Better still, make init_tlbs() weak.  Then you don't need a config
> symbol, a new function name, or a dummy tlb_table[] -- you just redefine
> init_tlbs() in board code.

Nice idea. That radically simplifies the code.

> 
>> diff --git a/board/freescale/qemu-ppce500/qemu-ppce500.c b/board/freescale/qemu-ppce500/qemu-ppce500.c
>> new file mode 100644
>> index 0000000..fc546b9
>> --- /dev/null
>> +++ b/board/freescale/qemu-ppce500/qemu-ppce500.c
>> @@ -0,0 +1,334 @@
>> +/*
>> + * Copyright 2007,2009-2014 Freescale Semiconductor, Inc.
>> + *
>> + * SPDX-License-Identifier:	GPL-2.0+
>> + */
>> +
>> +#include <common.h>
>> +#include <command.h>
>> +#include <pci.h>
>> +#include <asm/processor.h>
>> +#include <asm/mmu.h>
>> +#include <asm/fsl_pci.h>
>> +#include <asm/io.h>
>> +#include <libfdt.h>
>> +#include <fdt_support.h>
>> +#include <netdev.h>
>> +#include <fdtdec.h>
>> +#include <errno.h>
>> +#include <malloc.h>
>> +
>> +DECLARE_GLOBAL_DATA_PTR;
>> +
>> +static void *get_fdt(void)
>> +{
>> +	return (void *)gd->fdt_blob;
>> +}
> 
> Does this return virtual or physical?

I've split this into a get_fdt_virt() and get_fdt_phys() function to make this more explicit.

> 
>> +
>> +uint64_t get_phys_ccsrbar_addr_early(void)
>> +{
>> +	u32 mas0, mas1, mas2, mas3, mas7;
>> +	ulong fdt = (ulong)get_fdt();
>> +	uint64_t r;
>> +
>> +	/*
>> +	 * To be able to read the FDT we need to create a temporary TLB
>> +	 * map for it.
>> +	 */
>> +
>> +	mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(10);
>> +	mas1 = MAS1_VALID | MAS1_TID(0) | MAS1_TS | MAS1_TSIZE(BOOKE_PAGESZ_1M);
>> +	mas2 = FSL_BOOKE_MAS2(fdt, 0);
>> +	mas3 = FSL_BOOKE_MAS3(fdt, 0, MAS3_SW|MAS3_SR);
>> +	mas7 = FSL_BOOKE_MAS7(fdt);
> 
> Don't use the physical address as a virtual address.  Use a known-good
> virtual address.  Using unknown physical addresses as virtual addresses
> is generally a bad idea (it's different for stuff whose physical address
> is hardcoded in U-Boot), but it's particularly bad here because you'll
> create an overlapping TLB entry if the fdt happens to live within your
> initial memory mapping.

Ok.

> 
> OK, I see you use CONFIG_SYS_TMPVIRT for this elsewhere, but I guess
> forgot to use it here (why is fdt mapping code duplicated?).

I've folded the AS=1 mapping functions into a single function and used tlb_map_range for the AS=0 one.

> 
>> +void pci_init_board(void)
>> +{
>> +	struct pci_controller *pci_hoses;
>> +	void *fdt = get_fdt();
>> +	int pci_node;
>> +	int pci_num = 0;
>> +	int pci_count = 0;
>> +	const char *compat = "fsl,mpc8540-pci";
>> +	ulong map_addr;
> 
> May want to look for arbitrary PCI host controllers (device_type would
> be simplest), in case the QEMU machine ever gets an upgrade to PCIe.

Ok.

> 
>> +
>> +	puts("\n");
>> +
>> +	/* Start MMIO and PIO range maps above RAM */
>> +	map_addr = CONFIG_MAX_MEM_MAPPED;
> 
> It'd be better to hardcode virtual addresses for this (as other boards
> do), and limit the size you map to the smaller of the hardcoded size or
> the device tree size.

I don't understand this comment. CONFIG_MAX_MEM_MAPPED is basically the first address available to IO maps, so with this it is properly hardcoded and ensured to always map IO to the same physical address regardless of memory passed in.

> 
>> +	/* Count and allocate PCI buses */
>> +	pci_node = fdt_node_offset_by_compatible(fdt, -1, compat);
>> +	while (pci_node != -FDT_ERR_NOTFOUND) {
>> +		pci_node = fdt_node_offset_by_compatible(fdt, pci_node, compat);
>> +		pci_count++;
>> +	}
>> +
>> +	if (pci_count) {
>> +		pci_hoses = malloc(sizeof(struct pci_controller) * pci_count);
>> +	} else {
>> +		printf("PCI: disabled\n\n");
>> +		return;
>> +	}
>> +
>> +	/* Spawn PCI buses based on device tree */
>> +	pci_node = fdt_node_offset_by_compatible(fdt, -1, compat);
>> +	while (pci_node != -FDT_ERR_NOTFOUND) {
>> +		struct fsl_pci_info pci_info = { };
>> +		const fdt32_t *reg;
>> +		int r;
>> +
>> +		reg = fdt_getprop(fdt, pci_node, "reg", NULL);
>> +		pci_info.regs = fdt_translate_address((void *)fdt, pci_node, reg);
> 
> Unnecessary cast.

Removed.

> 
>> +void init_tlbs_dynamic(void)
>> +{
>> +	unsigned long fdt_phys = (unsigned long)get_fdt();
>> +	unsigned long fdt_phys_tlb = fdt_phys & ~0xffffful;
>> +	unsigned long fdt_virt_tlb = CONFIG_SYS_TMPVIRT;
>> +	unsigned long fdt_virt = fdt_virt_tlb + (fdt_phys & 0xffffful);
>> +	u32 mas0, mas1, mas2, mas3, mas7;
>> +	phys_size_t ram_size;
>> +
>> +	/*
>> +	 * Create a temporary AS=1 map for the fdt
>> +	 *
>> +	 * We use ESEL=0 here to overwrite the previous AS=0 map for ourselves
>> +	 * which was only 4k big. This way we don't have to clear any other maps.
>> +	 */
> 
> I don't think it's generally safe to assume that this entry is ESEL 0 --
> though I'm wondering if the current TLB code is assuming that (or at
> least, assuming that whatever entry is used gets overwritten by the TLB
> table).

That's at least the way I understand the current code :). There's definitely no code anywhere that removes the 4k map:

  0x0000000000f00000 0x0000000000f00000   4K 0     0  SRWXURWX ----- U----

> 
>> +	mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(0);
>> +	mas1 = MAS1_VALID | MAS1_TID(0) | MAS1_TS | MAS1_TSIZE(BOOKE_PAGESZ_1M);
>> +	mas2 = FSL_BOOKE_MAS2(fdt_virt_tlb, 0);
>> +	mas3 = FSL_BOOKE_MAS3(fdt_phys_tlb, 0, MAS3_SW|MAS3_SR);
>> +	mas7 = FSL_BOOKE_MAS7(fdt_phys_tlb);
> 
> What if the fdt straddles a 1M boundary?

Then we fix the hypervisor ;). Even the 1MB is only an approximation. We don't know the size of the fdt. But I think we can expect the hypervisor to align it on 1MB. The masks here really are just to be nice to a hypervisor if it's broken (or knows exactly what it's doing).

> 
>> +	write_tlb(mas0, mas1, mas2, mas3, mas7);
>> +	gd->fdt_blob = (void *)fdt_virt;
>> +
>> +	/* Fetch RAM size from the fdt */
>> +	ram_size = fixed_sdram();
> 
> Why not just call get_linear_ram_size() directly?

*shrug*. Fixed :).

> 
>> +	/* And remove our fdt map again */
>> +	gd->fdt_blob = (void *)fdt_phys;
>> +	disable_tlb(0);
>> +
>> +	/* Create a dynamic AS=0 CCSRBAR mapping */
>> +	mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(13);
>> +	mas1 = MAS1_VALID | MAS1_TID(0) | MAS1_TSIZE(BOOKE_PAGESZ_1M);
>> +	mas2 = FSL_BOOKE_MAS2(CONFIG_SYS_CCSRBAR, MAS2_I|MAS2_G);
>> +	mas3 = FSL_BOOKE_MAS3(CONFIG_SYS_CCSRBAR_PHYS, 0, MAS3_SW|MAS3_SR);
>> +	mas7 = FSL_BOOKE_MAS7(CONFIG_SYS_CCSRBAR_PHYS);
>> +
>> +	write_tlb(mas0, mas1, mas2, mas3, mas7);
> 
> Why is this not done via tlb_map_range (after calling
> init_used_tlb_cams, of course)?

Only because this code predates the generic tlb_map_range function. I've fixed it.

> 
>> +
>> +	/* Create a RAM map that spans all accessible RAM */
>> +	init_used_tlb_cams();
>> +	setup_ddr_tlbs(ram_size >> 20);
>> +}
>> +
>> +void init_laws(void)
>> +{
>> +	/* We don't emulate LAWs yet */
>> +}
>> +
>> +static uint32_t get_cpu_freq(void)
>> +{
>> +	void *fdt = get_fdt();
>> +	int cpus_node = fdt_path_offset(fdt, "/cpus");
>> +	int cpu_node = fdt_first_subnode(fdt, cpus_node);
>> +	const char *prop = "clock-frequency";
>> +	return fdt_getprop_u32_default_node(fdt, cpu_node, 0, prop, 0);
>> +}
>> +
>> +void get_sys_info(sys_info_t *sys_info)
>> +{
>> +	int freq = get_cpu_freq();
>> +
>> +	memset(sys_info, 0, sizeof(sys_info_t));
>> +	sys_info->freq_systembus = freq;
>> +	sys_info->freq_ddrbus = freq;
>> +	sys_info->freq_processor[0] = freq;
>> +}
>> +
> 
> Again, if you're doing this you really should override get_tbclk()
> instead of letting it use the much faster cpufreq/8.

Ok, I've made get_tbclk weak and override it with a function that reads timebase-frequency from the first cpu node in dt.

> 
>> +int get_clocks (void)
>> +{
>> +	sys_info_t sys_info;
>> +
>> +	get_sys_info(&sys_info);
>> +
>> +	gd->cpu_clk = sys_info.freq_processor[0];
>> +	gd->bus_clk = sys_info.freq_systembus;
>> +	gd->mem_clk = sys_info.freq_ddrbus;
>> +	gd->arch.lbc_clk = sys_info.freq_ddrbus;
> 
> I wonder if with higher frequencies we'll trigger overflows in some
> drivers. :-)
> 
>> +/* Physical address should be a function call */
>> +#ifndef __ASSEMBLY__
>> +extern unsigned long long get_phys_ccsrbar_addr_early(void);
>> +#endif
> 
> Where does this get called?

CONFIG_SYS_CCSRBAR_PHYS is defined to this which gets used in cpu_init_early_f().

Alex