[U-Boot] [Patch v3 2/4] ARMv8/FSL_LSCH3: Add FSL_LSCH3 SoC
Rob Herring
robherring2 at gmail.com
Thu May 29 15:19:02 CEST 2014
On Wed, May 28, 2014 at 6:46 PM, York Sun <yorksun at freescale.com> wrote:
> Freescale LayerScape with Chassis Generation 3 is a set of SoCs with
> ARMv8 cores and 3rd generation of Chassis. We use different MMU setup
> to support memory map and cache attribute for these SoCs. MMU and cache
> are enabled very early to bootst performance, especially for early
> development on emulators. After u-boot relocates to DDR, a new MMU
> table with QBMan cache access is created in DDR. SMMU pagesize is set
> in SMMU_sACR register. Both DDR3 and DDR4 are supported.
>
> Signed-off-by: York Sun <yorksun at freescale.com>
> Signed-off-by: Varun Sethi <Varun.Sethi at freescale.com>
> Signed-off-by: Arnab Basu <arnab.basu at freescale.com>
> ---
> Change log:
> v3: Remove blank lines at the of files
> Fix cluster PLL GSR register for accessing beyond array size
> Update final MMU table to support QBMan memory with cache
> Set SMMU pagesize in SMMU_sACR register in lowlevel init.
> Add DDR4 support
> Remove forcing L3 cache flusing
> Update GICv3 redistributor base address
>
> Some of these changes are caused by model change.
>
> arch/arm/cpu/armv8/cache_v8.c | 7 +-
> arch/arm/cpu/armv8/fsl-lsch3/Makefile | 10 +
> arch/arm/cpu/armv8/fsl-lsch3/README | 10 +
> arch/arm/cpu/armv8/fsl-lsch3/cpu.c | 474 +++++++++++++++++++++
> arch/arm/cpu/armv8/fsl-lsch3/cpu.h | 7 +
> arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S | 65 +++
> arch/arm/cpu/armv8/fsl-lsch3/speed.c | 176 ++++++++
> arch/arm/cpu/armv8/fsl-lsch3/speed.h | 7 +
> arch/arm/cpu/armv8/fsl-lsch3/timer.c | 62 +++
> arch/arm/include/asm/arch-fsl-lsch3/clock.h | 23 +
> arch/arm/include/asm/arch-fsl-lsch3/config.h | 65 +++
> arch/arm/include/asm/arch-fsl-lsch3/gpio.h | 9 +
> arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h | 116 +++++
> arch/arm/include/asm/arch-fsl-lsch3/imx-regs.h | 13 +
> arch/arm/include/asm/arch-fsl-lsch3/mmu.h | 10 +
> arch/arm/include/asm/config.h | 4 +
> arch/arm/include/asm/system.h | 2 +
> drivers/i2c/mxc_i2c.c | 5 +
> include/common.h | 5 +-
> 19 files changed, 1066 insertions(+), 4 deletions(-)
> create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/Makefile
> create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/README
> create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/cpu.c
> create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/cpu.h
> create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S
> create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/speed.c
> create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/speed.h
> create mode 100644 arch/arm/cpu/armv8/fsl-lsch3/timer.c
> create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/clock.h
> create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/config.h
> create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/gpio.h
> create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/immap_lsch3.h
> create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/imx-regs.h
> create mode 100644 arch/arm/include/asm/arch-fsl-lsch3/mmu.h
>
> diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
> index a96ecda..c47acba 100644
> --- a/arch/arm/cpu/armv8/cache_v8.c
> +++ b/arch/arm/cpu/armv8/cache_v8.c
> @@ -83,12 +83,17 @@ void invalidate_dcache_all(void)
> __asm_invalidate_dcache_all();
> }
>
> +void __weak flush_l3_cache(void)
> +{
> +}
> +
> /*
> * Performs a clean & invalidation of the entire data cache at all levels
> */
> void flush_dcache_all(void)
> {
> __asm_flush_dcache_all();
> + flush_l3_cache();
> }
>
> /*
> @@ -221,7 +226,7 @@ void invalidate_icache_all(void)
> * Enable dCache & iCache, whether cache is actually enabled
> * depend on CONFIG_SYS_DCACHE_OFF and CONFIG_SYS_ICACHE_OFF
> */
> -void enable_caches(void)
> +void __weak enable_caches(void)
> {
> icache_enable();
> dcache_enable();
> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/Makefile b/arch/arm/cpu/armv8/fsl-lsch3/Makefile
> new file mode 100644
> index 0000000..4b859cf
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/fsl-lsch3/Makefile
> @@ -0,0 +1,10 @@
> +#
> +# Copyright 2014, Freescale Semiconductor
> +#
> +# SPDX-License-Identifier: GPL-2.0+
> +#
> +
> +obj-y += cpu.o
> +obj-y += timer.o
> +obj-y += lowlevel.o
> +obj-y += speed.o
> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/README b/arch/arm/cpu/armv8/fsl-lsch3/README
> new file mode 100644
> index 0000000..de34a91
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/fsl-lsch3/README
> @@ -0,0 +1,10 @@
> +#
> +# Copyright 2014 Freescale Semiconductor
> +#
> +# SPDX-License-Identifier: GPL-2.0+
> +#
> +
> +Freescale LayerScape with Chassis Generation 3
> +
> +This architecture supports Freescale ARMv8 SoCs with Chassis generation 3,
> +for example LS2100A.
> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.c b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c
> new file mode 100644
> index 0000000..2780390
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.c
> @@ -0,0 +1,474 @@
> +/*
> + * Copyright 2014 Freescale Semiconductor, Inc.
> + *
> + * SPDX-License-Identifier: GPL-2.0+
> + */
> +
> +#include <common.h>
> +#include <asm/io.h>
> +#include <asm/system.h>
> +#include <asm/armv8/mmu.h>
> +#include <asm/io.h>
> +#include <asm/arch-fsl-lsch3/immap_lsch3.h>
> +#include "cpu.h"
> +#include "speed.h"
> +
> +DECLARE_GLOBAL_DATA_PTR;
> +
> +#ifndef CONFIG_SYS_DCACHE_OFF
> +/*
> + * To start MMU before DDR is available, we create MMU table in SRAM.
> + * The base address of SRAM is CONFIG_SYS_FSL_OCRAM_BASE. We use three
> + * levels of translation tables here to cover 40-bit address space.
> + * We use 4KB granule size, with 40 bits physical address, T0SZ=24
> + * Level 0 IA[39], table address @0
> + * Level 1 IA[31:30], table address @01000, 0x2000
> + * Level 2 IA[29:21], table address @0x3000
> + */
> +
> +#define EARLY_SECTION_SHIFT_L0 39UL
> +#define EARLY_SECTION_SHIFT_L1 30UL
> +#define EARLY_SECTION_SHIFT_L2 21UL
> +#define EARLY_BLOCK_SIZE_L0 0x8000000000UL
> +#define EARLY_BLOCK_SIZE_L1 (1 << EARLY_SECTION_SHIFT_L1)
> +#define EARLY_BLOCK_SIZE_L2 (1 << EARLY_SECTION_SHIFT_L2)
> +#define CONFIG_SYS_IFC_BASE 0x30000000
> +#define CONFIG_SYS_IFC_SIZE 0x10000000
> +#define CONFIG_SYS_IFC_BASE2 0x500000000
> +#define CONFIG_SYS_IFC_SIZE2 0x100000000
> +#define TCR_EL2_PS_40BIT (2 << 16)
> +#define EARLY_VA_BITS (40)
> +#define EARLY_TCR (TCR_TG0_4K | \
> + TCR_EL2_PS_40BIT | \
> + TCR_SHARED_NON | \
> + TCR_ORGN_NC | \
> + TCR_IRGN_NC | \
> + TCR_T0SZ(EARLY_VA_BITS))
> +
> +/*
> + * Final MMU
> + * Let's start from the same layout as early MMU and modify as needed.
> + * IFC regions will be cache-inhibit.
> + */
> +#define FINAL_SECTION_SHIFT_L0 39UL
> +#define FINAL_SECTION_SHIFT_L1 30UL
> +#define FINAL_SECTION_SHIFT_L2 21UL
> +#define FINAL_BLOCK_SIZE_L0 0x8000000000UL
> +#define FINAL_BLOCK_SIZE_L1 (1 << FINAL_SECTION_SHIFT_L1)
> +#define FINAL_BLOCK_SIZE_L2 (1 << FINAL_SECTION_SHIFT_L2)
> +#define FINAL_QBMAN_CACHED_MEM 0x818000000UL
> +#define FINAL_QBMAN_CACHED_SIZE 0x4000000
> +#define TCR_EL2_PS_40BIT (2 << 16)
> +#define FINAL_VA_BITS (40)
> +#define FINAL_TCR (TCR_TG0_4K | \
> + TCR_EL2_PS_40BIT | \
> + TCR_SHARED_NON | \
> + TCR_ORGN_NC | \
> + TCR_IRGN_NC | \
> + TCR_T0SZ(FINAL_VA_BITS))
> +
> +
> +static void set_pgtable_section(u64 *page_table, u64 index, u64 section,
> + u8 memory_type)
> +{
> + u64 value;
> +
> + value = section | PMD_TYPE_SECT | PMD_SECT_AF;
> + value |= PMD_ATTRINDX(memory_type);
> + page_table[index] = value;
> +}
This function looks like it should be common.
> +
> +static inline void early_mmu_setup(void)
> +{
> + int el;
> + u64 i;
> + u64 section_l1t0, section_l1t1, section_l2;
> + u64 *level0_table = (u64 *)CONFIG_SYS_FSL_OCRAM_BASE;
> + u64 *level1_table_0 = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x1000);
> + u64 *level1_table_1 = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x2000);
> + u64 *level2_table = (u64 *)(CONFIG_SYS_FSL_OCRAM_BASE + 0x3000);
> +
> +
> + level0_table[0] =
> + (u64)level1_table_0 | PMD_TYPE_TABLE;
> + level0_table[1] =
> + (u64)level1_table_1 | PMD_TYPE_TABLE;
> +
> + /*
> + * set level 1 table 0 to cache_inhibit, covering 0 to 512GB
> + * set level 1 table 1 to cache enabled, covering 512GB to 1TB
> + * set level 2 table to cache-inhibit, covering 0 to 1GB
> + */
> + section_l1t0 = 0;
> + section_l1t1 = EARLY_BLOCK_SIZE_L0;
> + section_l2 = 0;
> + for (i = 0; i < 512; i++) {
> + set_pgtable_section(level1_table_0, i, section_l1t0,
> + MT_DEVICE_NGNRNE);
> + set_pgtable_section(level1_table_1, i, section_l1t1,
> + MT_NORMAL);
> + set_pgtable_section(level2_table, i, section_l2,
> + MT_DEVICE_NGNRNE);
> + section_l1t0 += EARLY_BLOCK_SIZE_L1;
> + section_l1t1 += EARLY_BLOCK_SIZE_L1;
> + section_l2 += EARLY_BLOCK_SIZE_L2;
> + }
> +
> + level1_table_0[0] =
> + (u64)level2_table | PMD_TYPE_TABLE;
> + level1_table_0[1] =
> + 0x40000000 | PMD_SECT_AF | PMD_TYPE_SECT |
> + PMD_ATTRINDX(MT_DEVICE_NGNRNE);
> + level1_table_0[2] =
> + 0x80000000 | PMD_SECT_AF | PMD_TYPE_SECT |
> + PMD_ATTRINDX(MT_NORMAL);
> + level1_table_0[3] =
> + 0xc0000000 | PMD_SECT_AF | PMD_TYPE_SECT |
> + PMD_ATTRINDX(MT_NORMAL);
> +
> + /* Rewrite table to enable cache */
> + set_pgtable_section(level2_table,
> + CONFIG_SYS_FSL_OCRAM_BASE >> EARLY_SECTION_SHIFT_L2,
> + CONFIG_SYS_FSL_OCRAM_BASE,
> + MT_NORMAL);
> + for (i = CONFIG_SYS_IFC_BASE >> EARLY_SECTION_SHIFT_L2;
> + i < (CONFIG_SYS_IFC_BASE + CONFIG_SYS_IFC_SIZE)
> + >> EARLY_SECTION_SHIFT_L2; i++) {
> + section_l2 = i << EARLY_SECTION_SHIFT_L2;
> + set_pgtable_section(level2_table, i,
> + section_l2, MT_NORMAL);
> + }
> +
> + el = current_el();
We really can't have u-boot running at random ELs in v8 for different
platforms. It's a mess on v7. You should never be at EL3. u-boot could
be defined to run at EL1, but then you need to be able to go back to
EL2 to boot the kernel. So really u-boot should always run at EL2
unless you are running in a VM, but that would be a different
platform.
> + if (el == 1) {
> + asm volatile("dsb sy;isb");
> + asm volatile("msr ttbr0_el1, %0"
> + : : "r" ((u64)level0_table) : "memory");
> + asm volatile("msr tcr_el1, %0"
> + : : "r" (EARLY_TCR) : "memory");
> + asm volatile("msr mair_el1, %0"
> + : : "r" (MEMORY_ATTRIBUTES) : "memory");
These should all be inline functions or macros.
> + } else if (el == 2) {
> + asm volatile("dsb sy;isb");
> + asm volatile("msr ttbr0_el2, %0"
> + : : "r" ((u64)level0_table) : "memory");
> + asm volatile("msr tcr_el2, %0"
> + : : "r" (EARLY_TCR) : "memory");
> + asm volatile("msr mair_el2, %0"
> + : : "r" (MEMORY_ATTRIBUTES) : "memory");
> + } else if (el == 3) {
> + asm volatile("dsb sy;isb");
> + asm volatile("msr ttbr0_el3, %0"
> + : : "r" ((u64)level0_table) : "memory");
> + asm volatile("msr tcr_el3, %0"
> + : : "r" (EARLY_TCR) : "memory");
> + asm volatile("msr mair_el3, %0"
> + : : "r" (MEMORY_ATTRIBUTES) : "memory");
> + } else {
> + hang();
> + }
> +
> + set_sctlr(get_sctlr() | CR_M);
> +}
> +
> +static inline void final_mmu_setup(void)
Looks like nearly the same code repeated...
> +{
> + int el;
> + u64 i, tbl_base, tbl_limit, section_base;
> + u64 section_l1t0, section_l1t1, section_l2;
> + u64 *level0_table = (u64 *)gd->arch.tlb_addr;
> + u64 *level1_table_0 = (u64 *)(gd->arch.tlb_addr + 0x1000);
> + u64 *level1_table_1 = (u64 *)(gd->arch.tlb_addr + 0x2000);
> + u64 *level2_table_0 = (u64 *)(gd->arch.tlb_addr + 0x3000);
> + u64 *level2_table_1 = (u64 *)(gd->arch.tlb_addr + 0x4000);
> +
> +
> + level0_table[0] =
> + (u64)level1_table_0 | PMD_TYPE_TABLE;
> + level0_table[1] =
> + (u64)level1_table_1 | PMD_TYPE_TABLE;
> +
> + /*
> + * set level 1 table 0 to cache_inhibit, covering 0 to 512GB
> + * set level 1 table 1 to cache enabled, covering 512GB to 1TB
> + * set level 2 table 0 to cache-inhibit, covering 0 to 1GB
> + */
> + section_l1t0 = 0;
> + section_l1t1 = FINAL_BLOCK_SIZE_L0;
> + section_l2 = 0;
> + for (i = 0; i < 512; i++) {
> + set_pgtable_section(level1_table_0, i, section_l1t0,
> + MT_DEVICE_NGNRNE);
> + set_pgtable_section(level1_table_1, i, section_l1t1,
> + MT_NORMAL);
> + set_pgtable_section(level2_table_0, i, section_l2,
> + MT_DEVICE_NGNRNE);
> + section_l1t0 += FINAL_BLOCK_SIZE_L1;
> + section_l1t1 += FINAL_BLOCK_SIZE_L1;
> + section_l2 += FINAL_BLOCK_SIZE_L2;
> + }
> +
> + level1_table_0[0] =
> + (u64)level2_table_0 | PMD_TYPE_TABLE;
> + level1_table_0[2] =
> + 0x80000000 | PMD_SECT_AF | PMD_TYPE_SECT |
> + PMD_ATTRINDX(MT_NORMAL);
> + level1_table_0[3] =
> + 0xc0000000 | PMD_SECT_AF | PMD_TYPE_SECT |
> + PMD_ATTRINDX(MT_NORMAL);
> +
> + /* Rewrite table to enable cache */
> + set_pgtable_section(level2_table_0,
> + CONFIG_SYS_FSL_OCRAM_BASE >> FINAL_SECTION_SHIFT_L2,
> + CONFIG_SYS_FSL_OCRAM_BASE,
> + MT_NORMAL);
> +
> + /*
> + * Fill in other part of tables if cache is needed
> + * If finer granularity than 1GB is needed, sub table
> + * should be created.
> + */
> + section_base = FINAL_QBMAN_CACHED_MEM & ~(FINAL_BLOCK_SIZE_L1 - 1);
> + i = section_base >> FINAL_SECTION_SHIFT_L1;
> + level1_table_0[i] = (u64)level2_table_1 | PMD_TYPE_TABLE;
> + section_l2 = section_base;
> + for (i = 0; i < 512; i++) {
> + set_pgtable_section(level2_table_1, i, section_l2,
> + MT_DEVICE_NGNRNE);
> + section_l2 += FINAL_BLOCK_SIZE_L2;
> + }
> + tbl_base = FINAL_QBMAN_CACHED_MEM & (FINAL_BLOCK_SIZE_L1 - 1);
> + tbl_limit = (FINAL_QBMAN_CACHED_MEM + FINAL_QBMAN_CACHED_SIZE) &
> + (FINAL_BLOCK_SIZE_L1 - 1);
> + for (i = tbl_base >> FINAL_SECTION_SHIFT_L2;
> + i < tbl_limit >> FINAL_SECTION_SHIFT_L2; i++) {
> + section_l2 = section_base + (i << FINAL_SECTION_SHIFT_L2);
> + set_pgtable_section(level2_table_1, i,
> + section_l2, MT_NORMAL);
> + }
> +
> + el = current_el();
> + if (el == 1) {
> + asm volatile("dsb sy;isb");
> + asm volatile("msr ttbr0_el1, %0"
> + : : "r" ((u64)level0_table) : "memory");
> + asm volatile("msr tcr_el1, %0"
> + : : "r" (FINAL_TCR) : "memory");
> + asm volatile("msr mair_el1, %0"
> + : : "r" (MEMORY_ATTRIBUTES) : "memory");
> + } else if (el == 2) {
> + asm volatile("dsb sy;isb");
> + asm volatile("msr ttbr0_el2, %0"
> + : : "r" ((u64)level0_table) : "memory");
> + asm volatile("msr tcr_el2, %0"
> + : : "r" (FINAL_TCR) : "memory");
> + asm volatile("msr mair_el2, %0"
> + : : "r" (MEMORY_ATTRIBUTES) : "memory");
> + } else if (el == 3) {
> + asm volatile("dsb sy;isb");
> + asm volatile("msr ttbr0_el3, %0"
> + : : "r" ((u64)level0_table) : "memory");
> + asm volatile("msr tcr_el3, %0"
> + : : "r" (FINAL_TCR) : "memory");
> + asm volatile("msr mair_el3, %0"
> + : : "r" (MEMORY_ATTRIBUTES) : "memory");
> + } else {
> + hang();
> + }
> +
> + set_sctlr(get_sctlr() | CR_M);
> +}
> +
> +int arch_cpu_init(void)
> +{
> + icache_enable();
> + __asm_invalidate_dcache_all();
> + __asm_invalidate_tlb_all();
> + early_mmu_setup();
> + set_sctlr(get_sctlr() | CR_C);
> + return 0;
> +}
> +
> +/*
> + * flush_l3_cache
> + * Dickens L3 cache can be flushed by transitioning from FAM to SFONLY power
> + * state, by writing to HP-F P-state request register.
Other SOCs will have Dickens. Are these registers FSL specific? If
not, this should be common.
Also, I believe the proper way to flush Dickens is using the
architected cache flushing method where you walk the levels out to
level 3.
> + */
> +#define HNF0_PSTATE_REQ 0x04200010
> +#define HNF1_PSTATE_REQ 0x04210010
> +#define HNF2_PSTATE_REQ 0x04220010
> +#define HNF3_PSTATE_REQ 0x04230010
> +#define HNF4_PSTATE_REQ 0x04240010
> +#define HNF5_PSTATE_REQ 0x04250010
> +#define HNF6_PSTATE_REQ 0x04260010
> +#define HNF7_PSTATE_REQ 0x04270010
> +#define HNFPSTAT_MASK (0xFFFFFFFFFFFFFFFC)
> +#define HNFPSTAT_FAM 0x3
> +#define HNFPSTAT_SFONLY 0x01
> +
> +static void hnf_pstate_req(u64 *ptr, u64 state)
> +{
> + int timeout = 1000;
> + out_le64(ptr, (in_le64(ptr) & HNFPSTAT_MASK) | (state & 0x3));
> + ptr++;
> + /* checking if the transition is completed */
> + while (timeout > 0) {
> + if (((in_le64(ptr) & 0x0c) >> 2) == (state & 0x3))
> + break;
> + udelay(100);
> + timeout--;
> + }
> +}
> +
> +void flush_l3_cache(void)
> +{
> + hnf_pstate_req((u64 *)HNF0_PSTATE_REQ, HNFPSTAT_SFONLY);
> + hnf_pstate_req((u64 *)HNF1_PSTATE_REQ, HNFPSTAT_SFONLY);
> + hnf_pstate_req((u64 *)HNF2_PSTATE_REQ, HNFPSTAT_SFONLY);
> + hnf_pstate_req((u64 *)HNF3_PSTATE_REQ, HNFPSTAT_SFONLY);
> + hnf_pstate_req((u64 *)HNF4_PSTATE_REQ, HNFPSTAT_SFONLY);
> + hnf_pstate_req((u64 *)HNF5_PSTATE_REQ, HNFPSTAT_SFONLY);
> + hnf_pstate_req((u64 *)HNF6_PSTATE_REQ, HNFPSTAT_SFONLY);
> + hnf_pstate_req((u64 *)HNF7_PSTATE_REQ, HNFPSTAT_SFONLY);
> + hnf_pstate_req((u64 *)HNF0_PSTATE_REQ, HNFPSTAT_FAM);
> + hnf_pstate_req((u64 *)HNF1_PSTATE_REQ, HNFPSTAT_FAM);
> + hnf_pstate_req((u64 *)HNF2_PSTATE_REQ, HNFPSTAT_FAM);
> + hnf_pstate_req((u64 *)HNF3_PSTATE_REQ, HNFPSTAT_FAM);
> + hnf_pstate_req((u64 *)HNF4_PSTATE_REQ, HNFPSTAT_FAM);
> + hnf_pstate_req((u64 *)HNF5_PSTATE_REQ, HNFPSTAT_FAM);
> + hnf_pstate_req((u64 *)HNF6_PSTATE_REQ, HNFPSTAT_FAM);
> + hnf_pstate_req((u64 *)HNF7_PSTATE_REQ, HNFPSTAT_FAM);
> +}
> +
> +/*
> + * This function is called from lib/board.c.
> + * It recreates MMU table in main memory. MMU and d-cache are enabled earlier.
> + * There is no need to disable d-cache for this operation.
> + */
> +void enable_caches(void)
> +{
> + final_mmu_setup();
> + flush_dcache_range(gd->arch.tlb_addr,
> + gd->arch.tlb_addr + gd->arch.tlb_size);
> + __asm_invalidate_tlb_all();
> +}
> +#endif
> +
> +static inline u32 init_type(u32 cluster, int init_id)
init_type? That's a great name.
> +{
> + struct ccsr_gur *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
> + u32 idx = (cluster >> (init_id * 8)) & TP_CLUSTER_INIT_MASK;
> + u32 type = in_le32(&gur->tp_ityp[idx]);
> +
> + if (type & TP_ITYP_AV)
> + return type;
> +
> + return 0;
> +}
> +
> +u32 cpu_mask(void)
> +{
> + struct ccsr_gur __iomem *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
> + int i = 0, count = 0;
> + u32 cluster, type, mask = 0;
> +
> + do {
> + int j;
> + cluster = in_le32(&gur->tp_cluster[i].lower);
> + for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
> + type = init_type(cluster, j);
> + if (type) {
> + if (TP_ITYP_TYPE(type) == TP_ITYP_TYPE_ARM)
> + mask |= 1 << count;
> + count++;
> + }
> + }
> + i++;
> + } while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
> +
> + return mask;
> +}
> +
> +/*
> + * Return the number of cores on this SOC.
> + */
> +int cpu_numcores(void)
> +{
> + return hweight32(cpu_mask());
> +}
> +
> +int fsl_qoriq_core_to_cluster(unsigned int core)
> +{
> + struct ccsr_gur __iomem *gur =
> + (void __iomem *)(CONFIG_SYS_FSL_GUTS_ADDR);
> + int i = 0, count = 0;
> + u32 cluster;
> +
> + do {
> + int j;
> + cluster = in_le32(&gur->tp_cluster[i].lower);
> + for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
> + if (init_type(cluster, j)) {
> + if (count == core)
> + return i;
> + count++;
> + }
> + }
> + i++;
> + } while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
> +
> + return -1; /* cannot identify the cluster */
> +}
> +
> +u32 fsl_qoriq_core_to_type(unsigned int core)
> +{
> + struct ccsr_gur __iomem *gur =
> + (void __iomem *)(CONFIG_SYS_FSL_GUTS_ADDR);
> + int i = 0, count = 0;
> + u32 cluster, type;
> +
> + do {
> + int j;
> + cluster = in_le32(&gur->tp_cluster[i].lower);
> + for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
> + type = init_type(cluster, j);
> + if (type) {
> + if (count == core)
> + return type;
> + count++;
> + }
> + }
> + i++;
> + } while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
> +
> + return -1; /* cannot identify the cluster */
> +}
Do you plan on supporting PSCI because all this core and cluster stuff
belongs there.
> +
> +#ifdef CONFIG_DISPLAY_CPUINFO
> +int print_cpuinfo(void)
> +{
> + struct sys_info sysinfo;
> + char buf[32];
> + unsigned int i, core;
> + u32 type;
> +
> + get_sys_info(&sysinfo);
> + puts("Clock Configuration:");
> + for_each_cpu(i, core, cpu_numcores(), cpu_mask()) {
> + if (!(i % 3))
> + puts("\n ");
> + type = TP_ITYP_VER(fsl_qoriq_core_to_type(core));
> + printf("CPU%d(%s):%-4s MHz ", core,
> + type == TY_ITYP_VER_A7 ? "A7 " :
> + (type == TY_ITYP_VER_A53 ? "A53" :
> + (type == TY_ITYP_VER_A57 ? "A57" : " ")),
> + strmhz(buf, sysinfo.freq_processor[core]));
> + }
> + printf("\n Bus: %-4s MHz ",
> + strmhz(buf, sysinfo.freq_systembus));
> + printf("DDR: %-4s MHz", strmhz(buf, sysinfo.freq_ddrbus));
> + puts("\n");
> +
> + return 0;
> +}
> +#endif
> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/cpu.h b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h
> new file mode 100644
> index 0000000..28544d7
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/fsl-lsch3/cpu.h
> @@ -0,0 +1,7 @@
> +/*
> + * Copyright 2014, Freescale Semiconductor
> + *
> + * SPDX-License-Identifier: GPL-2.0+
> + */
> +
> +int fsl_qoriq_core_to_cluster(unsigned int core);
> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S
> new file mode 100644
> index 0000000..087d5d1
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/fsl-lsch3/lowlevel.S
> @@ -0,0 +1,65 @@
> +/*
> + * (C) Copyright 2014 Freescale Semiconductor
> + *
> + * SPDX-License-Identifier: GPL-2.0+
> + *
> + * Extracted from armv8/start.S
> + */
> +
> +#include <config.h>
> +#include <linux/linkage.h>
> +#include <asm/macro.h>
> +
> +ENTRY(lowlevel_init)
> + /* Initialize GIC Secure Bank Status */
> + mov x29, lr /* Save LR */
> +
> + /* Set the SMMU page size in the sACR register */
> + ldr x1, =SMMU_BASE
> + ldr w0, [x1, #0x10]
> + orr w0, w0, #1 << 16 /* set sACR.pagesize to indicate 64K page */
> + str w0, [x1, #0x10]
> +
> +#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3)
You can have either v2 or v3?
> + branch_if_slave x0, 1f
> + ldr x0, =GICD_BASE
> + bl gic_init_secure
> +1:
> +#if defined(CONFIG_GICV3)
> + ldr x0, =GICR_BASE
> + bl gic_init_secure_percpu
> +#elif defined(CONFIG_GICV2)
> + ldr x0, =GICD_BASE
> + ldr x1, =GICC_BASE
> + bl gic_init_secure_percpu
> +#endif
> +#endif
> +
> + branch_if_master x0, x1, 1f
> +
> + /*
> + * Slave should wait for master clearing spin table.
> + * This sync prevent salves observing incorrect
> + * value of spin table and jumping to wrong place.
> + */
> +#if defined(CONFIG_GICV2) || defined(CONFIG_GICV3)
> +#ifdef CONFIG_GICV2
> + ldr x0, =GICC_BASE
> +#endif
> + bl gic_wait_for_interrupt
> +#endif
> +
> + /*
> + * All processors will enter EL2 and optionally EL1.
> + */
> + bl armv8_switch_to_el2
> +#ifdef CONFIG_ARMV8_SWITCH_TO_EL1
> + bl armv8_switch_to_el1
> +#endif
> + b 2f
This all looks like cut and paste from existing startup code. Can't
you refactor things?
> +
> +1:
> +2:
> + mov lr, x29 /* Restore LR */
> + ret
> +ENDPROC(lowlevel_init)
> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/speed.c b/arch/arm/cpu/armv8/fsl-lsch3/speed.c
> new file mode 100644
> index 0000000..dc4a34b
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/fsl-lsch3/speed.c
> @@ -0,0 +1,176 @@
> +/*
> + * Copyright 2014, Freescale Semiconductor, Inc.
> + *
> + * SPDX-License-Identifier: GPL-2.0+
> + *
> + * Derived from arch/power/cpu/mpc85xx/speed.c
> + */
> +
> +#include <common.h>
> +#include <linux/compiler.h>
> +#include <fsl_ifc.h>
> +#include <asm/processor.h>
> +#include <asm/io.h>
> +#include <asm/arch-fsl-lsch3/immap_lsch3.h>
> +#include <asm/arch/clock.h>
> +#include "cpu.h"
> +
> +DECLARE_GLOBAL_DATA_PTR;
> +
> +#ifndef CONFIG_SYS_FSL_NUM_CC_PLLS
> +#define CONFIG_SYS_FSL_NUM_CC_PLLS 6
> +#endif
> +
> +
> +void get_sys_info(struct sys_info *sys_info)
> +{
> + struct ccsr_gur __iomem *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
> +#ifdef CONFIG_FSL_IFC
> + struct fsl_ifc *ifc_regs = (void *)CONFIG_SYS_IFC_ADDR;
> + u32 ccr;
> +#endif
> + struct ccsr_clk_cluster_group __iomem *clk_grp[2] = {
> + (void *)(CONFIG_SYS_FSL_CH3_CLK_GRPA_ADDR),
> + (void *)(CONFIG_SYS_FSL_CH3_CLK_GRPB_ADDR)
> + };
> + struct ccsr_clk_ctrl __iomem *clk_ctrl =
> + (void *)(CONFIG_SYS_FSL_CH3_CLK_CTRL_ADDR);
> + unsigned int cpu;
> + const u8 core_cplx_pll[16] = {
> + [0] = 0, /* CC1 PPL / 1 */
> + [1] = 0, /* CC1 PPL / 2 */
> + [2] = 0, /* CC1 PPL / 4 */
> + [4] = 1, /* CC2 PPL / 1 */
> + [5] = 1, /* CC2 PPL / 2 */
> + [6] = 1, /* CC2 PPL / 4 */
> + [8] = 2, /* CC3 PPL / 1 */
> + [9] = 2, /* CC3 PPL / 2 */
> + [10] = 2, /* CC3 PPL / 4 */
> + [12] = 3, /* CC4 PPL / 1 */
> + [13] = 3, /* CC4 PPL / 2 */
> + [14] = 3, /* CC4 PPL / 4 */
> + };
> +
> + const u8 core_cplx_pll_div[16] = {
> + [0] = 1, /* CC1 PPL / 1 */
> + [1] = 2, /* CC1 PPL / 2 */
> + [2] = 4, /* CC1 PPL / 4 */
> + [4] = 1, /* CC2 PPL / 1 */
> + [5] = 2, /* CC2 PPL / 2 */
> + [6] = 4, /* CC2 PPL / 4 */
> + [8] = 1, /* CC3 PPL / 1 */
> + [9] = 2, /* CC3 PPL / 2 */
> + [10] = 4, /* CC3 PPL / 4 */
> + [12] = 1, /* CC4 PPL / 1 */
> + [13] = 2, /* CC4 PPL / 2 */
> + [14] = 4, /* CC4 PPL / 4 */
> + };
> +
> + uint i, cluster;
> + uint freq_c_pll[CONFIG_SYS_FSL_NUM_CC_PLLS];
> + uint ratio[CONFIG_SYS_FSL_NUM_CC_PLLS];
> + unsigned long sysclk = CONFIG_SYS_CLK_FREQ;
> + int cc_group[12] = CONFIG_SYS_FSL_CLUSTER_CLOCKS;
> + u32 c_pll_sel, cplx_pll;
> + void *offset;
> +
> + sys_info->freq_systembus = sysclk;
> +#ifdef CONFIG_DDR_CLK_FREQ
> + sys_info->freq_ddrbus = CONFIG_DDR_CLK_FREQ;
> +#else
> + sys_info->freq_ddrbus = sysclk;
> +#endif
> +
> + sys_info->freq_systembus *= (in_le32(&gur->rcwsr[0]) >>
> + FSL_CHASSIS3_RCWSR0_SYS_PLL_RAT_SHIFT) &
> + FSL_CHASSIS3_RCWSR0_SYS_PLL_RAT_MASK;
> + sys_info->freq_ddrbus *= (in_le32(&gur->rcwsr[0]) >>
> + FSL_CHASSIS3_RCWSR0_MEM_PLL_RAT_SHIFT) &
> + FSL_CHASSIS3_RCWSR0_MEM_PLL_RAT_MASK;
> +
> + for (i = 0; i < CONFIG_SYS_FSL_NUM_CC_PLLS; i++) {
> + /*
> + * fixme: prefer to combine the following into one line, but
> + * cannot pass compiling without warning about in_le32.
> + */
> + offset = (void *)((size_t)clk_grp[i/3] +
> + offsetof(struct ccsr_clk_cluster_group,
> + pllngsr[i%3].gsr));
> + ratio[i] = (in_le32(offset) >> 1) & 0x3f;
> + if (ratio[i] > 4)
> + freq_c_pll[i] = sysclk * ratio[i];
> + else
> + freq_c_pll[i] = sys_info->freq_systembus * ratio[i];
> + }
> +
> + for_each_cpu(i, cpu, cpu_numcores(), cpu_mask()) {
> + cluster = fsl_qoriq_core_to_cluster(cpu);
> + c_pll_sel = (in_le32(&clk_ctrl->clkcncsr[cluster].csr) >> 27)
> + & 0xf;
> + cplx_pll = core_cplx_pll[c_pll_sel];
> + cplx_pll += cc_group[cluster] - 1;
> + sys_info->freq_processor[cpu] =
> + freq_c_pll[cplx_pll] / core_cplx_pll_div[c_pll_sel];
> + }
> +
> +#if defined(CONFIG_FSL_IFC)
> + ccr = in_le32(&ifc_regs->ifc_ccr);
> + ccr = ((ccr & IFC_CCR_CLK_DIV_MASK) >> IFC_CCR_CLK_DIV_SHIFT) + 1;
> +
> + sys_info->freq_localbus = sys_info->freq_systembus / ccr;
> +#endif
> +}
> +
> +
> +int get_clocks(void)
> +{
> + struct sys_info sys_info;
> + get_sys_info(&sys_info);
> + gd->cpu_clk = sys_info.freq_processor[0];
> + gd->bus_clk = sys_info.freq_systembus;
> + gd->mem_clk = sys_info.freq_ddrbus;
> +
> +#if defined(CONFIG_FSL_ESDHC)
> + gd->arch.sdhc_clk = gd->bus_clk / 2;
> +#endif /* defined(CONFIG_FSL_ESDHC) */
> +
> + if (gd->cpu_clk != 0)
> + return 0;
> + else
> + return 1;
> +}
> +
> +/********************************************
> + * get_bus_freq
> + * return system bus freq in Hz
> + *********************************************/
> +ulong get_bus_freq(ulong dummy)
> +{
> + if (!gd->bus_clk)
> + get_clocks();
> +
> + return gd->bus_clk;
> +}
> +
> +/********************************************
> + * get_ddr_freq
> + * return ddr bus freq in Hz
> + *********************************************/
> +ulong get_ddr_freq(ulong dummy)
> +{
> + if (!gd->mem_clk)
> + get_clocks();
> +
> + return gd->mem_clk;
> +}
> +
> +unsigned int mxc_get_clock(enum mxc_clock clk)
> +{
> + switch (clk) {
> + case MXC_I2C_CLK:
> + return get_bus_freq(0) / 2;
> + default:
> + printf("Unsupported clock\n");
> + }
> + return 0;
> +}
> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/speed.h b/arch/arm/cpu/armv8/fsl-lsch3/speed.h
> new file mode 100644
> index 0000000..15af5b9
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/fsl-lsch3/speed.h
> @@ -0,0 +1,7 @@
> +/*
> + * Copyright 2014, Freescale Semiconductor, Inc.
> + *
> + * SPDX-License-Identifier: GPL-2.0+
> + */
> +
> +void get_sys_info(struct sys_info *sys_info);
> diff --git a/arch/arm/cpu/armv8/fsl-lsch3/timer.c b/arch/arm/cpu/armv8/fsl-lsch3/timer.c
> new file mode 100644
> index 0000000..3adfa41
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/fsl-lsch3/timer.c
> @@ -0,0 +1,62 @@
> +/*
> + * Copyright 2014, Freescale Semiconductor
> + *
> + * SPDX-License-Identifier: GPL-2.0+
> + */
> +
> +#include <common.h>
> +#include <div64.h>
> +#include <linux/compiler.h>
> +
> +static inline u64 get_cntfrq(void)
> +{
> + u64 cntfrq;
> + asm volatile("mrs %0, cntfrq_el0" : "=r" (cntfrq));
> + return cntfrq;
> +}
> +
> +static inline u64 tick_to_time(u64 tick)
> +{
> + tick *= CONFIG_SYS_HZ;
> + do_div(tick, get_cntfrq());
> + return tick;
> +}
> +
> +static inline u64 time_to_tick(u64 time)
> +{
> + time *= get_cntfrq();
> + do_div(time, CONFIG_SYS_HZ);
> + return time;
> +}
> +
> +static inline u64 us_to_tick(unsigned long long usec)
> +{
> + usec = usec * get_cntfrq() + 999999;
> + do_div(usec, 1000000);
> +
> + return usec;
> +}
> +
> +u64 get_ticks(void)
> +{
> + u64 cval;
> +
> + asm volatile("isb;mrs %0, cntpct_el0" : "=r" (cval));
> +
> + return cval;
> +}
> +
> +ulong get_timer(ulong base)
> +{
> + return tick_to_time(get_ticks()) - base;
> +}
> +
> +void __udelay(unsigned long usec)
> +{
> + u64 start, tmo;
> +
> + start = get_ticks();
> + tmo = us_to_tick(usec);
> + while (get_ticks() < (start + tmo))
> + ;
> +}
What's wrong with the existing arch timer code?
Rob
More information about the U-Boot
mailing list