[PATCH] arch: arm: recode the initialization of GICv3 ITS Re-Distributor tables

Z.Q. Hou zhiqiang.hou at nxp.com
Mon Feb 28 16:27:11 CET 2022


Hi Marc,

Thanks a lot for your comments!

> -----Original Message-----
> From: Marc Zyngier <maz at kernel.org>
> Sent: 2022年2月26日 19:10
> To: Z.Q. Hou <zhiqiang.hou at nxp.com>
> Cc: u-boot at lists.denx.de; sjg at chromium.org;
> bharat.gooty at broadcom.com; Priyanka Jain <priyanka.jain at nxp.com>;
> michael at walle.cc
> Subject: Re: [PATCH] arch: arm: recode the initialization of GICv3 ITS
> Re-Distributor tables
> 
> On Fri, 25 Feb 2022 13:41:06 +0000,
> Zhiqiang Hou <Zhiqiang.Hou at nxp.com> wrote:
> >
> > From: Hou Zhiqiang <Zhiqiang.Hou at nxp.com>
> >
> > The current implementation needs the caller provides the memory region
> > for the property and pending tables and the number of re-distibutor,
> > and it doesn't handle the address alignment of the tables and doesn't
> > help to add the reserved-memory node for the tables.
> >
> > This patch change to use the device tree blob as argument and deal
> > with the aboves in the internal of this helper to make it easier to use.
> >
> > Signed-off-by: Hou Zhiqiang <Zhiqiang.Hou at nxp.com>
> > ---
> >  arch/arm/Kconfig                        |   1 -
> >  arch/arm/cpu/armv8/fsl-layerscape/fdt.c |   4 +-
> >  arch/arm/cpu/armv8/fsl-layerscape/soc.c |  46 +-------
> >  arch/arm/include/asm/gic-v3.h           |   4 +-
> >  arch/arm/lib/gic-v3-its.c               | 142 ++++++++++--------------
> >  5 files changed, 62 insertions(+), 135 deletions(-)
> >
> > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index
> > 391a77c2b4..0f6a32b428 100644
> > --- a/arch/arm/Kconfig
> > +++ b/arch/arm/Kconfig
> > @@ -82,7 +82,6 @@ config GICV3
> >
> >  config GIC_V3_ITS
> >  	bool "ARM GICV3 ITS"
> > -	select IRQ
> >  	help
> >  	  ARM GICV3 Interrupt translation service (ITS).
> >  	  Basic support for programming locality specific peripheral diff
> > --git a/arch/arm/cpu/armv8/fsl-layerscape/fdt.c
> > b/arch/arm/cpu/armv8/fsl-layerscape/fdt.c
> > index 2fa7ebf163..10cb675fae 100644
> > --- a/arch/arm/cpu/armv8/fsl-layerscape/fdt.c
> > +++ b/arch/arm/cpu/armv8/fsl-layerscape/fdt.c
> > @@ -1,7 +1,7 @@
> >  // SPDX-License-Identifier: GPL-2.0+
> >  /*
> >   * Copyright 2014-2015 Freescale Semiconductor, Inc.
> > - * Copyright 2020-2021 NXP
> > + * Copyright 2020-2022 NXP
> 
> Really? Isn't that what the git log is for?

This is required by NXP policy.

> 
> >   */
> >
> >  #include <common.h>
> > @@ -653,7 +653,7 @@ void ft_cpu_setup(void *blob, struct bd_info *bd)
> >  			     get_board_sys_clk(), 1);
> >
> >  #ifdef CONFIG_GIC_V3_ITS
> > -	ls_gic_rd_tables_init(blob);
> > +	gic_lpi_tables_init(blob);
> >  #endif
> 
> gic_lpi_tables_init() already has a definition when CONFIG_GIC_V3_ITS isn't
> selected. Why the #ifdef-ery?

I'll remove it in next version.

> 
> >
> >  #if defined(CONFIG_PCIE_LAYERSCAPE) ||
> > defined(CONFIG_PCIE_LAYERSCAPE_GEN4)
> > diff --git a/arch/arm/cpu/armv8/fsl-layerscape/soc.c
> > b/arch/arm/cpu/armv8/fsl-layerscape/soc.c
> > index d3a5cfaac1..51ed942f57 100644
> > --- a/arch/arm/cpu/armv8/fsl-layerscape/soc.c
> > +++ b/arch/arm/cpu/armv8/fsl-layerscape/soc.c
> > @@ -1,17 +1,15 @@
> >  // SPDX-License-Identifier: GPL-2.0+
> >  /*
> >   * Copyright 2014-2015 Freescale Semiconductor
> > - * Copyright 2019-2021 NXP
> > + * Copyright 2019-2022 NXP
> >   */
> >
> >  #include <common.h>
> >  #include <clock_legacy.h>
> > -#include <cpu_func.h>
> >  #include <env.h>
> >  #include <fsl_immap.h>
> >  #include <fsl_ifc.h>
> >  #include <init.h>
> > -#include <linux/sizes.h>
> >  #include <log.h>
> >  #include <asm/arch/fsl_serdes.h>
> >  #include <asm/arch/soc.h>
> > @@ -21,7 +19,6 @@
> >  #include <asm/arch-fsl-layerscape/config.h>
> >  #include <asm/arch-fsl-layerscape/ns_access.h>
> >  #include <asm/arch-fsl-layerscape/fsl_icid.h>
> > -#include <asm/gic-v3.h>
> >  #ifdef CONFIG_LAYERSCAPE_NS_ACCESS
> >  #include <fsl_csu.h>
> >  #endif
> > @@ -36,47 +33,6 @@
> >  #include <dm.h>
> >  #include <dm/device_compat.h>
> >  #include <linux/err.h>
> > -#ifdef CONFIG_GIC_V3_ITS
> > -DECLARE_GLOBAL_DATA_PTR;
> > -#endif
> > -
> > -#ifdef CONFIG_GIC_V3_ITS
> > -#define PENDTABLE_MAX_SZ	ALIGN(BIT(ITS_MAX_LPI_NRBITS), SZ_64K)
> > -#define PROPTABLE_MAX_SZ	ALIGN(BIT(ITS_MAX_LPI_NRBITS) / 8,
> SZ_64K)
> > -#define GIC_LPI_SIZE		ALIGN(cpu_numcores() *
> PENDTABLE_MAX_SZ + \
> > -				PROPTABLE_MAX_SZ, SZ_1M)
> > -static int fdt_add_resv_mem_gic_rd_tables(void *blob, u64 base,
> > size_t size) -{
> > -	int err;
> > -	struct fdt_memory gic_rd_tables;
> > -
> > -	gic_rd_tables.start = base;
> > -	gic_rd_tables.end = base + size - 1;
> > -	err = fdtdec_add_reserved_memory(blob, "gic-rd-tables",
> &gic_rd_tables,
> > -					 NULL, 0, NULL, 0);
> > -	if (err < 0)
> > -		debug("%s: failed to add reserved memory: %d\n", __func__, err);
> > -
> > -	return err;
> > -}
> > -
> > -int ls_gic_rd_tables_init(void *blob) -{
> > -	u64 gic_lpi_base;
> > -	int ret;
> > -
> > -	gic_lpi_base = ALIGN(gd->arch.resv_ram - GIC_LPI_SIZE, SZ_64K);
> > -	ret = fdt_add_resv_mem_gic_rd_tables(blob, gic_lpi_base,
> GIC_LPI_SIZE);
> > -	if (ret)
> > -		return ret;
> > -
> > -	ret = gic_lpi_tables_init(gic_lpi_base, cpu_numcores());
> > -	if (ret)
> > -		debug("%s: failed to init gic-lpi-tables\n", __func__);
> > -
> > -	return ret;
> > -}
> > -#endif
> >
> >  bool soc_has_dp_ddr(void)
> >  {
> > diff --git a/arch/arm/include/asm/gic-v3.h
> > b/arch/arm/include/asm/gic-v3.h index 5131fabec4..e2e175f065 100644
> > --- a/arch/arm/include/asm/gic-v3.h
> > +++ b/arch/arm/include/asm/gic-v3.h
> > @@ -127,9 +127,9 @@
> >  #define GIC_REDISTRIBUTOR_OFFSET 0x20000
> >
> >  #ifdef CONFIG_GIC_V3_ITS
> > -int gic_lpi_tables_init(u64 base, u32 max_redist);
> > +int gic_lpi_tables_init(void *blob);
> >  #else
> > -int gic_lpi_tables_init(u64 base, u32 max_redist)
> > +int gic_lpi_tables_init(void *blob);
> >  {
> >  	return 0;
> >  }
> 
> You obviously haven't compiled this. And this should be made inline.
> 
 
Thanks for the catch! Will remove the wrongly added ';' and change it to inline.

> > diff --git a/arch/arm/lib/gic-v3-its.c b/arch/arm/lib/gic-v3-its.c
> > index f6211a2d92..3ef1e74954 100644
> > --- a/arch/arm/lib/gic-v3-its.c
> > +++ b/arch/arm/lib/gic-v3-its.c
> > @@ -1,92 +1,65 @@
> >  // SPDX-License-Identifier: GPL-2.0+
> >  /*
> >   * Copyright 2019 Broadcom.
> > + * Copyright 2022 NXP
> 
> You got to love all these copyright attribution for nothing...
> 
> >   */
> > +
> >  #include <common.h>
> >  #include <cpu_func.h>
> > -#include <dm.h>
> > +#include <fdtdec.h>
> >  #include <asm/gic.h>
> >  #include <asm/gic-v3.h>
> >  #include <asm/io.h>
> >  #include <linux/bitops.h>
> >  #include <linux/sizes.h>
> > +#include <memalign.h>
> >
> >  static u32 lpi_id_bits;
> >
> >  #define LPI_NRBITS		lpi_id_bits
> > -#define LPI_PROPBASE_SZ		ALIGN(BIT(LPI_NRBITS), SZ_64K)
> > +#define LPI_PROPBASE_SZ		BIT(LPI_NRBITS)
> >  #define LPI_PENDBASE_SZ		ALIGN(BIT(LPI_NRBITS) / 8, SZ_64K)
> >
> > -/*
> > - * gic_v3_its_priv - gic details
> > - *
> > - * @gicd_base: gicd base address
> > - * @gicr_base: gicr base address
> > - */
> > -struct gic_v3_its_priv {
> > -	ulong gicd_base;
> > -	ulong gicr_base;
> > -};
> > -
> > -static int gic_v3_its_get_gic_addr(struct gic_v3_its_priv *priv) -{
> > -	struct udevice *dev;
> > -	fdt_addr_t addr;
> > -	int ret;
> > -
> > -	ret = uclass_get_device_by_driver(UCLASS_IRQ,
> > -					  DM_DRIVER_GET(arm_gic_v3_its), &dev);
> > -	if (ret) {
> > -		pr_err("%s: failed to get %s irq device\n", __func__,
> > -		       DM_DRIVER_GET(arm_gic_v3_its)->name);
> > -		return ret;
> > -	}
> > -
> > -	addr = dev_read_addr_index(dev, 0);
> > -	if (addr == FDT_ADDR_T_NONE) {
> > -		pr_err("%s: failed to get GICD address\n", __func__);
> > -		return -EINVAL;
> > -	}
> > -	priv->gicd_base = addr;
> > -
> > -	addr = dev_read_addr_index(dev, 1);
> > -	if (addr == FDT_ADDR_T_NONE) {
> > -		pr_err("%s: failed to get GICR address\n", __func__);
> > -		return -EINVAL;
> > -	}
> > -	priv->gicr_base = addr;
> > -
> > -	return 0;
> > -}
> > -
> 
> Hold on: why is it valid to delete this code? This is obviously a change in
> behaviour affecting the Broadcom platforms. Maybe this does nothing
> useful, but you really have to explain *why*.

As the macros GICD_BASE/GICR_BASE have been used in armv8/start.S, for simplicity directly use them here.
Currently only Layerscape platform code called the function gic_lpi_tables_init(), so it doesn't affect other platforms.


> 
> >  /*
> >   * Program the GIC LPI configuration tables for all
> >   * the re-distributors and enable the LPI table
> > - * base: Configuration table address
> > - * num_redist: number of redistributors
> > + * blob: Device tree blob
> >   */
> > -int gic_lpi_tables_init(u64 base, u32 num_redist)
> > +int gic_lpi_tables_init(void *blob)
> >  {
> > -	struct gic_v3_its_priv priv;
> > +	struct fdt_memory lpi_tables;
> > +	ulong pend_tab_total_sz;
> > +	u64 pend_table_base;
> > +	u32 num_redist = 0;
> > +	void *pend_tab_va;
> > +	int cpu_node = -1;
> >  	u32 gicd_typer;
> > -	u64 val;
> > -	u64 tmp;
> > -	int i;
> > -	u64 redist_lpi_base;
> >  	u64 pend_base;
> > -	ulong pend_tab_total_sz = num_redist * LPI_PENDBASE_SZ;
> > -	void *pend_tab_va;
> > -
> > -	if (gic_v3_its_get_gic_addr(&priv))
> > -		return -EINVAL;
> > +	u64 val, tmp;
> > +	void *base;
> > +	int err, i;
> >
> > -	gicd_typer = readl((uintptr_t)(priv.gicd_base + GICD_TYPER));
> > +	gicd_typer = readl((uintptr_t)(GICD_BASE + GICD_TYPER));
> 
> Where is GICD_BASE coming from? Are you now assuming that all platforms
> will define this constant instead of retrieving it from... or wait: the device
> tree?

Yes, I once thought getting from device tree, but as mentioned above for simplicity used GICD_BASE/GICR_BASE.
The GICD_BASE/GICR_BASE must be defined if they defined CONFIG_GICV3, otherwise the armv8/start.S will failed to build.

> 
> >  	/* GIC support for Locality specific peripheral interrupts (LPI's) */
> >  	if (!(gicd_typer & GICD_TYPER_LPIS)) {
> >  		pr_err("GIC implementation does not support LPI's\n");
> >  		return -EINVAL;
> >  	}
> >
> > +	/* lpi_id_bits to get LPI_PENDBASE_SZ and LPI_PROPBASE_SZ */
> > +	lpi_id_bits = min_t(u32, GICD_TYPER_ID_BITS(gicd_typer),
> > +			    ITS_MAX_LPI_NRBITS);
> > +
> > +	while (1) {
> > +		cpu_node = fdt_node_offset_by_prop_value(blob, cpu_node,
> > +							 "device_type",
> > +							 "cpu", 4);
> > +		if (cpu_node == -FDT_ERR_NOTFOUND)
> > +			break;
> > +
> > +		num_redist++;
> > +	}
> 
> NAK. The device tree binding tells you how many redistributors you have.
> Counting CPUs is wrong. Please use the binding as specified (and please
> account for the difference is size between GICv3 and GICv4).

Do you mean calculate the number of redistributors base on the properties 'redistributor-regions' and 'redistributor-stride'?
But in P26 of 'IHI0069G_gic_architecture_specification.pdf', it notes 'There is one copy of each of the Redistributor registers per PE', does it mean there are the same numbers of redistributors as the CPUs?

> 
> > +
> >  	/*
> >  	 * Check for LPI is disabled for all the redistributors.
> >  	 * Once the LPI table is enabled, can not program the @@ -95,48
> > +68,58 @@ int gic_lpi_tables_init(u64 base, u32 num_redist)
> >  	for (i = 0; i < num_redist; i++) {
> >  		u32 offset = i * GIC_REDISTRIBUTOR_OFFSET;
> >
> > -		if ((readl((uintptr_t)(priv.gicr_base + offset))) &
> > +		if ((readl((uintptr_t)(GICR_BASE + offset))) &
> 
> Oh, because you actually believe there is a *SINGLE* redistributor range?
> Time to face reality: you can have an *infinite* number of them. You need to
> handle ranges of redistributors, and each redistributor inside the range. 

This patch didn't change the property/pending base initialization logic, seems it only works on single redistributor range case, it can be improved. But I doesn't find the illustration on the redistributor range in gic architecture spec, can you help point out the section?
And what do you mean of *infinite* number of redistributors, my understanding is each PE needs one redistributor connecting to its cpu interface, can you help understand what is the extra redistributors for?


> 
> >  		    GICR_CTLR_ENABLE_LPIS) {
> > -			pr_err("Re-Distributor %d LPI is already enabled\n",
> > -			       i);
> > +			pr_err("Re-Distributor %d LPI is already enabled\n", i);
> >  			return -EINVAL;
> >  		}
> >  	}
> >
> > -	/* lpi_id_bits to get LPI_PENDBASE_SZ and LPi_PROPBASE_SZ */
> > -	lpi_id_bits = min_t(u32, GICD_TYPER_ID_BITS(gicd_typer),
> > -			    ITS_MAX_LPI_NRBITS);
> > +	pend_tab_total_sz = num_redist * LPI_PENDBASE_SZ;
> > +
> > +	base = memalign(SZ_64K, LPI_PROPBASE_SZ + pend_tab_total_sz);
> > +	if (!base)
> > +		return -ENOMEM;
> > +
> > +	lpi_tables.start = (phys_addr_t)base;
> > +	lpi_tables.end = (phys_addr_t)base + LPI_PROPBASE_SZ +
> > +			 pend_tab_total_sz - 1;
> > +	err = fdtdec_add_reserved_memory(blob, "gic-lpi-tables", &lpi_tables,
> > +					 NULL, 0, NULL, 0);
> > +	if (err) {
> > +		free(base);
> > +		return err;
> > +	}
> >
> >  	/* Set PropBase */
> > -	val = (base |
> > +	val = (((phys_addr_t)base + pend_tab_total_sz) |
> >  	       GICR_PROPBASER_INNERSHAREABLE |
> >  	       GICR_PROPBASER_RAWAWB |
> >  	       ((LPI_NRBITS - 1) & GICR_PROPBASER_IDBITS_MASK));
> >
> > -	writeq(val, (uintptr_t)(priv.gicr_base + GICR_PROPBASER));
> > -	tmp = readl((uintptr_t)(priv.gicr_base + GICR_PROPBASER));
> > +	writeq(val, (uintptr_t)(GICR_BASE + GICR_PROPBASER));
> > +	tmp = readl((uintptr_t)(GICR_BASE + GICR_PROPBASER));
> 
> This is also broken. The register exist *on each redistributor*. The fact that
> some implementations allow you to only write it once in IMPDEF, and you
> can't rely on it. Think of a multi-socket system. How do you expect this to
> work?
 
Agree that the current code rely on IMPDEF and it can be improved.
For the multi-socket system, it is out of my imagination, can you help to explain how GICv3 implementation works on this kind of system? Thanks!

Thanks,
Zhiqiang

> 
> >  	if ((tmp ^ val) & GICR_PROPBASER_SHAREABILITY_MASK) {
> >  		if (!(tmp & GICR_PROPBASER_SHAREABILITY_MASK)) {
> >  			val &= ~(GICR_PROPBASER_SHAREABILITY_MASK |
> >  				GICR_PROPBASER_CACHEABILITY_MASK);
> >  			val |= GICR_PROPBASER_NC;
> > -			writeq(val,
> > -			       (uintptr_t)(priv.gicr_base + GICR_PROPBASER));
> > +			writeq(val, (uintptr_t)(GICR_BASE + GICR_PROPBASER));
> >  		}
> >  	}
> >
> > -	redist_lpi_base = base + LPI_PROPBASE_SZ;
> > -	pend_tab_va = map_physmem(redist_lpi_base, pend_tab_total_sz,
> > +	pend_table_base = (phys_addr_t)base;
> > +	pend_tab_va = map_physmem(pend_table_base, pend_tab_total_sz,
> >  				  MAP_NOCACHE);
> >  	memset(pend_tab_va, 0, pend_tab_total_sz);
> >  	flush_cache((ulong)pend_tab_va, pend_tab_total_sz);
> >  	unmap_physmem(pend_tab_va, MAP_NOCACHE);
> >
> > -	pend_base = priv.gicr_base + GICR_PENDBASER;
> > +	pend_base = GICR_BASE + GICR_PENDBASER;
> >  	for (i = 0; i < num_redist; i++) {
> >  		u32 offset = i * GIC_REDISTRIBUTOR_OFFSET;
> >
> > -		val = ((redist_lpi_base + (i * LPI_PENDBASE_SZ)) |
> > +		val = ((pend_table_base + (i * LPI_PENDBASE_SZ)) |
> >  			GICR_PENDBASER_INNERSHAREABLE |
> >  			GICR_PENDBASER_RAWAWB |
> >  			GICR_PENDBASER_PTZ);
> > @@ -152,19 +135,8 @@ int gic_lpi_tables_init(u64 base, u32 num_redist)
> >
> >  		/* Enable LPI for the redistributor */
> >  		writel(GICR_CTLR_ENABLE_LPIS,
> > -		       (uintptr_t)(priv.gicr_base + offset));
> > +		       (uintptr_t)(GICR_BASE + offset));
> >  	}
> 
> All of this is equally broken for the reasons mentioned above.
> 
> >
> >  	return 0;
> >  }
> > -
> > -static const struct udevice_id gic_v3_its_ids[] = {
> > -	{ .compatible = "arm,gic-v3" },
> > -	{}
> > -};
> > -
> > -U_BOOT_DRIVER(arm_gic_v3_its) = {
> > -	.name		= "gic-v3",
> > -	.id		= UCLASS_IRQ,
> > -	.of_match	= gic_v3_its_ids,
> > -};
> 
> And this smells like an awful regression.
> 
> 	M.
> 
> --
> Without deviation from the norm, progress is not possible.


More information about the U-Boot mailing list