[PATCH] arch: arm: recode the initialization of GICv3 ITS Re-Distributor tables
Z.Q. Hou
zhiqiang.hou at nxp.com
Mon Feb 28 16:27:11 CET 2022
Hi Marc,
Thanks a lot for your comments!
> -----Original Message-----
> From: Marc Zyngier <maz at kernel.org>
> Sent: 2022年2月26日 19:10
> To: Z.Q. Hou <zhiqiang.hou at nxp.com>
> Cc: u-boot at lists.denx.de; sjg at chromium.org;
> bharat.gooty at broadcom.com; Priyanka Jain <priyanka.jain at nxp.com>;
> michael at walle.cc
> Subject: Re: [PATCH] arch: arm: recode the initialization of GICv3 ITS
> Re-Distributor tables
>
> On Fri, 25 Feb 2022 13:41:06 +0000,
> Zhiqiang Hou <Zhiqiang.Hou at nxp.com> wrote:
> >
> > From: Hou Zhiqiang <Zhiqiang.Hou at nxp.com>
> >
> > The current implementation needs the caller provides the memory region
> > for the property and pending tables and the number of re-distibutor,
> > and it doesn't handle the address alignment of the tables and doesn't
> > help to add the reserved-memory node for the tables.
> >
> > This patch change to use the device tree blob as argument and deal
> > with the aboves in the internal of this helper to make it easier to use.
> >
> > Signed-off-by: Hou Zhiqiang <Zhiqiang.Hou at nxp.com>
> > ---
> > arch/arm/Kconfig | 1 -
> > arch/arm/cpu/armv8/fsl-layerscape/fdt.c | 4 +-
> > arch/arm/cpu/armv8/fsl-layerscape/soc.c | 46 +-------
> > arch/arm/include/asm/gic-v3.h | 4 +-
> > arch/arm/lib/gic-v3-its.c | 142 ++++++++++--------------
> > 5 files changed, 62 insertions(+), 135 deletions(-)
> >
> > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index
> > 391a77c2b4..0f6a32b428 100644
> > --- a/arch/arm/Kconfig
> > +++ b/arch/arm/Kconfig
> > @@ -82,7 +82,6 @@ config GICV3
> >
> > config GIC_V3_ITS
> > bool "ARM GICV3 ITS"
> > - select IRQ
> > help
> > ARM GICV3 Interrupt translation service (ITS).
> > Basic support for programming locality specific peripheral diff
> > --git a/arch/arm/cpu/armv8/fsl-layerscape/fdt.c
> > b/arch/arm/cpu/armv8/fsl-layerscape/fdt.c
> > index 2fa7ebf163..10cb675fae 100644
> > --- a/arch/arm/cpu/armv8/fsl-layerscape/fdt.c
> > +++ b/arch/arm/cpu/armv8/fsl-layerscape/fdt.c
> > @@ -1,7 +1,7 @@
> > // SPDX-License-Identifier: GPL-2.0+
> > /*
> > * Copyright 2014-2015 Freescale Semiconductor, Inc.
> > - * Copyright 2020-2021 NXP
> > + * Copyright 2020-2022 NXP
>
> Really? Isn't that what the git log is for?
This is required by NXP policy.
>
> > */
> >
> > #include <common.h>
> > @@ -653,7 +653,7 @@ void ft_cpu_setup(void *blob, struct bd_info *bd)
> > get_board_sys_clk(), 1);
> >
> > #ifdef CONFIG_GIC_V3_ITS
> > - ls_gic_rd_tables_init(blob);
> > + gic_lpi_tables_init(blob);
> > #endif
>
> gic_lpi_tables_init() already has a definition when CONFIG_GIC_V3_ITS isn't
> selected. Why the #ifdef-ery?
I'll remove it in next version.
>
> >
> > #if defined(CONFIG_PCIE_LAYERSCAPE) ||
> > defined(CONFIG_PCIE_LAYERSCAPE_GEN4)
> > diff --git a/arch/arm/cpu/armv8/fsl-layerscape/soc.c
> > b/arch/arm/cpu/armv8/fsl-layerscape/soc.c
> > index d3a5cfaac1..51ed942f57 100644
> > --- a/arch/arm/cpu/armv8/fsl-layerscape/soc.c
> > +++ b/arch/arm/cpu/armv8/fsl-layerscape/soc.c
> > @@ -1,17 +1,15 @@
> > // SPDX-License-Identifier: GPL-2.0+
> > /*
> > * Copyright 2014-2015 Freescale Semiconductor
> > - * Copyright 2019-2021 NXP
> > + * Copyright 2019-2022 NXP
> > */
> >
> > #include <common.h>
> > #include <clock_legacy.h>
> > -#include <cpu_func.h>
> > #include <env.h>
> > #include <fsl_immap.h>
> > #include <fsl_ifc.h>
> > #include <init.h>
> > -#include <linux/sizes.h>
> > #include <log.h>
> > #include <asm/arch/fsl_serdes.h>
> > #include <asm/arch/soc.h>
> > @@ -21,7 +19,6 @@
> > #include <asm/arch-fsl-layerscape/config.h>
> > #include <asm/arch-fsl-layerscape/ns_access.h>
> > #include <asm/arch-fsl-layerscape/fsl_icid.h>
> > -#include <asm/gic-v3.h>
> > #ifdef CONFIG_LAYERSCAPE_NS_ACCESS
> > #include <fsl_csu.h>
> > #endif
> > @@ -36,47 +33,6 @@
> > #include <dm.h>
> > #include <dm/device_compat.h>
> > #include <linux/err.h>
> > -#ifdef CONFIG_GIC_V3_ITS
> > -DECLARE_GLOBAL_DATA_PTR;
> > -#endif
> > -
> > -#ifdef CONFIG_GIC_V3_ITS
> > -#define PENDTABLE_MAX_SZ ALIGN(BIT(ITS_MAX_LPI_NRBITS), SZ_64K)
> > -#define PROPTABLE_MAX_SZ ALIGN(BIT(ITS_MAX_LPI_NRBITS) / 8,
> SZ_64K)
> > -#define GIC_LPI_SIZE ALIGN(cpu_numcores() *
> PENDTABLE_MAX_SZ + \
> > - PROPTABLE_MAX_SZ, SZ_1M)
> > -static int fdt_add_resv_mem_gic_rd_tables(void *blob, u64 base,
> > size_t size) -{
> > - int err;
> > - struct fdt_memory gic_rd_tables;
> > -
> > - gic_rd_tables.start = base;
> > - gic_rd_tables.end = base + size - 1;
> > - err = fdtdec_add_reserved_memory(blob, "gic-rd-tables",
> &gic_rd_tables,
> > - NULL, 0, NULL, 0);
> > - if (err < 0)
> > - debug("%s: failed to add reserved memory: %d\n", __func__, err);
> > -
> > - return err;
> > -}
> > -
> > -int ls_gic_rd_tables_init(void *blob) -{
> > - u64 gic_lpi_base;
> > - int ret;
> > -
> > - gic_lpi_base = ALIGN(gd->arch.resv_ram - GIC_LPI_SIZE, SZ_64K);
> > - ret = fdt_add_resv_mem_gic_rd_tables(blob, gic_lpi_base,
> GIC_LPI_SIZE);
> > - if (ret)
> > - return ret;
> > -
> > - ret = gic_lpi_tables_init(gic_lpi_base, cpu_numcores());
> > - if (ret)
> > - debug("%s: failed to init gic-lpi-tables\n", __func__);
> > -
> > - return ret;
> > -}
> > -#endif
> >
> > bool soc_has_dp_ddr(void)
> > {
> > diff --git a/arch/arm/include/asm/gic-v3.h
> > b/arch/arm/include/asm/gic-v3.h index 5131fabec4..e2e175f065 100644
> > --- a/arch/arm/include/asm/gic-v3.h
> > +++ b/arch/arm/include/asm/gic-v3.h
> > @@ -127,9 +127,9 @@
> > #define GIC_REDISTRIBUTOR_OFFSET 0x20000
> >
> > #ifdef CONFIG_GIC_V3_ITS
> > -int gic_lpi_tables_init(u64 base, u32 max_redist);
> > +int gic_lpi_tables_init(void *blob);
> > #else
> > -int gic_lpi_tables_init(u64 base, u32 max_redist)
> > +int gic_lpi_tables_init(void *blob);
> > {
> > return 0;
> > }
>
> You obviously haven't compiled this. And this should be made inline.
>
Thanks for the catch! Will remove the wrongly added ';' and change it to inline.
> > diff --git a/arch/arm/lib/gic-v3-its.c b/arch/arm/lib/gic-v3-its.c
> > index f6211a2d92..3ef1e74954 100644
> > --- a/arch/arm/lib/gic-v3-its.c
> > +++ b/arch/arm/lib/gic-v3-its.c
> > @@ -1,92 +1,65 @@
> > // SPDX-License-Identifier: GPL-2.0+
> > /*
> > * Copyright 2019 Broadcom.
> > + * Copyright 2022 NXP
>
> You got to love all these copyright attribution for nothing...
>
> > */
> > +
> > #include <common.h>
> > #include <cpu_func.h>
> > -#include <dm.h>
> > +#include <fdtdec.h>
> > #include <asm/gic.h>
> > #include <asm/gic-v3.h>
> > #include <asm/io.h>
> > #include <linux/bitops.h>
> > #include <linux/sizes.h>
> > +#include <memalign.h>
> >
> > static u32 lpi_id_bits;
> >
> > #define LPI_NRBITS lpi_id_bits
> > -#define LPI_PROPBASE_SZ ALIGN(BIT(LPI_NRBITS), SZ_64K)
> > +#define LPI_PROPBASE_SZ BIT(LPI_NRBITS)
> > #define LPI_PENDBASE_SZ ALIGN(BIT(LPI_NRBITS) / 8, SZ_64K)
> >
> > -/*
> > - * gic_v3_its_priv - gic details
> > - *
> > - * @gicd_base: gicd base address
> > - * @gicr_base: gicr base address
> > - */
> > -struct gic_v3_its_priv {
> > - ulong gicd_base;
> > - ulong gicr_base;
> > -};
> > -
> > -static int gic_v3_its_get_gic_addr(struct gic_v3_its_priv *priv) -{
> > - struct udevice *dev;
> > - fdt_addr_t addr;
> > - int ret;
> > -
> > - ret = uclass_get_device_by_driver(UCLASS_IRQ,
> > - DM_DRIVER_GET(arm_gic_v3_its), &dev);
> > - if (ret) {
> > - pr_err("%s: failed to get %s irq device\n", __func__,
> > - DM_DRIVER_GET(arm_gic_v3_its)->name);
> > - return ret;
> > - }
> > -
> > - addr = dev_read_addr_index(dev, 0);
> > - if (addr == FDT_ADDR_T_NONE) {
> > - pr_err("%s: failed to get GICD address\n", __func__);
> > - return -EINVAL;
> > - }
> > - priv->gicd_base = addr;
> > -
> > - addr = dev_read_addr_index(dev, 1);
> > - if (addr == FDT_ADDR_T_NONE) {
> > - pr_err("%s: failed to get GICR address\n", __func__);
> > - return -EINVAL;
> > - }
> > - priv->gicr_base = addr;
> > -
> > - return 0;
> > -}
> > -
>
> Hold on: why is it valid to delete this code? This is obviously a change in
> behaviour affecting the Broadcom platforms. Maybe this does nothing
> useful, but you really have to explain *why*.
As the macros GICD_BASE/GICR_BASE have been used in armv8/start.S, for simplicity directly use them here.
Currently only Layerscape platform code called the function gic_lpi_tables_init(), so it doesn't affect other platforms.
>
> > /*
> > * Program the GIC LPI configuration tables for all
> > * the re-distributors and enable the LPI table
> > - * base: Configuration table address
> > - * num_redist: number of redistributors
> > + * blob: Device tree blob
> > */
> > -int gic_lpi_tables_init(u64 base, u32 num_redist)
> > +int gic_lpi_tables_init(void *blob)
> > {
> > - struct gic_v3_its_priv priv;
> > + struct fdt_memory lpi_tables;
> > + ulong pend_tab_total_sz;
> > + u64 pend_table_base;
> > + u32 num_redist = 0;
> > + void *pend_tab_va;
> > + int cpu_node = -1;
> > u32 gicd_typer;
> > - u64 val;
> > - u64 tmp;
> > - int i;
> > - u64 redist_lpi_base;
> > u64 pend_base;
> > - ulong pend_tab_total_sz = num_redist * LPI_PENDBASE_SZ;
> > - void *pend_tab_va;
> > -
> > - if (gic_v3_its_get_gic_addr(&priv))
> > - return -EINVAL;
> > + u64 val, tmp;
> > + void *base;
> > + int err, i;
> >
> > - gicd_typer = readl((uintptr_t)(priv.gicd_base + GICD_TYPER));
> > + gicd_typer = readl((uintptr_t)(GICD_BASE + GICD_TYPER));
>
> Where is GICD_BASE coming from? Are you now assuming that all platforms
> will define this constant instead of retrieving it from... or wait: the device
> tree?
Yes, I once thought getting from device tree, but as mentioned above for simplicity used GICD_BASE/GICR_BASE.
The GICD_BASE/GICR_BASE must be defined if they defined CONFIG_GICV3, otherwise the armv8/start.S will failed to build.
>
> > /* GIC support for Locality specific peripheral interrupts (LPI's) */
> > if (!(gicd_typer & GICD_TYPER_LPIS)) {
> > pr_err("GIC implementation does not support LPI's\n");
> > return -EINVAL;
> > }
> >
> > + /* lpi_id_bits to get LPI_PENDBASE_SZ and LPI_PROPBASE_SZ */
> > + lpi_id_bits = min_t(u32, GICD_TYPER_ID_BITS(gicd_typer),
> > + ITS_MAX_LPI_NRBITS);
> > +
> > + while (1) {
> > + cpu_node = fdt_node_offset_by_prop_value(blob, cpu_node,
> > + "device_type",
> > + "cpu", 4);
> > + if (cpu_node == -FDT_ERR_NOTFOUND)
> > + break;
> > +
> > + num_redist++;
> > + }
>
> NAK. The device tree binding tells you how many redistributors you have.
> Counting CPUs is wrong. Please use the binding as specified (and please
> account for the difference is size between GICv3 and GICv4).
Do you mean calculate the number of redistributors base on the properties 'redistributor-regions' and 'redistributor-stride'?
But in P26 of 'IHI0069G_gic_architecture_specification.pdf', it notes 'There is one copy of each of the Redistributor registers per PE', does it mean there are the same numbers of redistributors as the CPUs?
>
> > +
> > /*
> > * Check for LPI is disabled for all the redistributors.
> > * Once the LPI table is enabled, can not program the @@ -95,48
> > +68,58 @@ int gic_lpi_tables_init(u64 base, u32 num_redist)
> > for (i = 0; i < num_redist; i++) {
> > u32 offset = i * GIC_REDISTRIBUTOR_OFFSET;
> >
> > - if ((readl((uintptr_t)(priv.gicr_base + offset))) &
> > + if ((readl((uintptr_t)(GICR_BASE + offset))) &
>
> Oh, because you actually believe there is a *SINGLE* redistributor range?
> Time to face reality: you can have an *infinite* number of them. You need to
> handle ranges of redistributors, and each redistributor inside the range.
This patch didn't change the property/pending base initialization logic, seems it only works on single redistributor range case, it can be improved. But I doesn't find the illustration on the redistributor range in gic architecture spec, can you help point out the section?
And what do you mean of *infinite* number of redistributors, my understanding is each PE needs one redistributor connecting to its cpu interface, can you help understand what is the extra redistributors for?
>
> > GICR_CTLR_ENABLE_LPIS) {
> > - pr_err("Re-Distributor %d LPI is already enabled\n",
> > - i);
> > + pr_err("Re-Distributor %d LPI is already enabled\n", i);
> > return -EINVAL;
> > }
> > }
> >
> > - /* lpi_id_bits to get LPI_PENDBASE_SZ and LPi_PROPBASE_SZ */
> > - lpi_id_bits = min_t(u32, GICD_TYPER_ID_BITS(gicd_typer),
> > - ITS_MAX_LPI_NRBITS);
> > + pend_tab_total_sz = num_redist * LPI_PENDBASE_SZ;
> > +
> > + base = memalign(SZ_64K, LPI_PROPBASE_SZ + pend_tab_total_sz);
> > + if (!base)
> > + return -ENOMEM;
> > +
> > + lpi_tables.start = (phys_addr_t)base;
> > + lpi_tables.end = (phys_addr_t)base + LPI_PROPBASE_SZ +
> > + pend_tab_total_sz - 1;
> > + err = fdtdec_add_reserved_memory(blob, "gic-lpi-tables", &lpi_tables,
> > + NULL, 0, NULL, 0);
> > + if (err) {
> > + free(base);
> > + return err;
> > + }
> >
> > /* Set PropBase */
> > - val = (base |
> > + val = (((phys_addr_t)base + pend_tab_total_sz) |
> > GICR_PROPBASER_INNERSHAREABLE |
> > GICR_PROPBASER_RAWAWB |
> > ((LPI_NRBITS - 1) & GICR_PROPBASER_IDBITS_MASK));
> >
> > - writeq(val, (uintptr_t)(priv.gicr_base + GICR_PROPBASER));
> > - tmp = readl((uintptr_t)(priv.gicr_base + GICR_PROPBASER));
> > + writeq(val, (uintptr_t)(GICR_BASE + GICR_PROPBASER));
> > + tmp = readl((uintptr_t)(GICR_BASE + GICR_PROPBASER));
>
> This is also broken. The register exist *on each redistributor*. The fact that
> some implementations allow you to only write it once in IMPDEF, and you
> can't rely on it. Think of a multi-socket system. How do you expect this to
> work?
Agree that the current code rely on IMPDEF and it can be improved.
For the multi-socket system, it is out of my imagination, can you help to explain how GICv3 implementation works on this kind of system? Thanks!
Thanks,
Zhiqiang
>
> > if ((tmp ^ val) & GICR_PROPBASER_SHAREABILITY_MASK) {
> > if (!(tmp & GICR_PROPBASER_SHAREABILITY_MASK)) {
> > val &= ~(GICR_PROPBASER_SHAREABILITY_MASK |
> > GICR_PROPBASER_CACHEABILITY_MASK);
> > val |= GICR_PROPBASER_NC;
> > - writeq(val,
> > - (uintptr_t)(priv.gicr_base + GICR_PROPBASER));
> > + writeq(val, (uintptr_t)(GICR_BASE + GICR_PROPBASER));
> > }
> > }
> >
> > - redist_lpi_base = base + LPI_PROPBASE_SZ;
> > - pend_tab_va = map_physmem(redist_lpi_base, pend_tab_total_sz,
> > + pend_table_base = (phys_addr_t)base;
> > + pend_tab_va = map_physmem(pend_table_base, pend_tab_total_sz,
> > MAP_NOCACHE);
> > memset(pend_tab_va, 0, pend_tab_total_sz);
> > flush_cache((ulong)pend_tab_va, pend_tab_total_sz);
> > unmap_physmem(pend_tab_va, MAP_NOCACHE);
> >
> > - pend_base = priv.gicr_base + GICR_PENDBASER;
> > + pend_base = GICR_BASE + GICR_PENDBASER;
> > for (i = 0; i < num_redist; i++) {
> > u32 offset = i * GIC_REDISTRIBUTOR_OFFSET;
> >
> > - val = ((redist_lpi_base + (i * LPI_PENDBASE_SZ)) |
> > + val = ((pend_table_base + (i * LPI_PENDBASE_SZ)) |
> > GICR_PENDBASER_INNERSHAREABLE |
> > GICR_PENDBASER_RAWAWB |
> > GICR_PENDBASER_PTZ);
> > @@ -152,19 +135,8 @@ int gic_lpi_tables_init(u64 base, u32 num_redist)
> >
> > /* Enable LPI for the redistributor */
> > writel(GICR_CTLR_ENABLE_LPIS,
> > - (uintptr_t)(priv.gicr_base + offset));
> > + (uintptr_t)(GICR_BASE + offset));
> > }
>
> All of this is equally broken for the reasons mentioned above.
>
> >
> > return 0;
> > }
> > -
> > -static const struct udevice_id gic_v3_its_ids[] = {
> > - { .compatible = "arm,gic-v3" },
> > - {}
> > -};
> > -
> > -U_BOOT_DRIVER(arm_gic_v3_its) = {
> > - .name = "gic-v3",
> > - .id = UCLASS_IRQ,
> > - .of_match = gic_v3_its_ids,
> > -};
>
> And this smells like an awful regression.
>
> M.
>
> --
> Without deviation from the norm, progress is not possible.
More information about the U-Boot
mailing list