[U-Boot] [PATCH 1/2] mips: spi: mscc: Add fast bitbang SPI driver

Lars.Povlsen at microchip.com Lars.Povlsen at microchip.com
Mon Jan 7 10:19:07 UTC 2019


Hi Daniel!

Thank you for your comments. I hope we are not carpet-bombing you
with patches :-). Your efforts are much appreciated!

I added comments below.

> -----Original Message-----
> From: Daniel Schwierzeck <daniel.schwierzeck at gmail.com>
> Sent: Friday, January 4, 2019 19:53
> To: Lars Povlsen - M31675 <Lars.Povlsen at microchip.com>; u-
> boot at lists.denx.de
> Cc: gregory.clement at bootlin.com; Horatiu Vultur - M31836
> <Horatiu.Vultur at microchip.com>; Jagan Teki <jagan at openedev.com>
> Subject: Re: [PATCH 1/2] mips: spi: mscc: Add fast bitbang SPI driver
> 
> 
> 
> Am 04.01.19 um 10:47 schrieb Lars Povlsen:
> > From: Lars Povlsen <lars.povlsen at microsemi.com>
> >
> > This patch add a new SPI driver for MSCC SOCs that does not sport the
> > designware SPI hardware controller.
> >
> > Performance gain: 7.664 seconds vs. 17.633 for 1 Mbyte write -
> > measured on a Luton10 with a m25p128 NOR flash.
> 
> is the generic bitbang driver such inefficient (e.g. by having to much
> delays) or does the improvement cone from cutting out GPIO API and
> drivers? IMHO it's not the best idea to workaround generic drivers by
> creating custom ones and repeating parts of the code.

Surely some overhead is introduced by the GPIO API. In this case the overhead
is made worse by an "emulation" on top of the normal GPIO drivers on the SoC
due to the fact that CS0 is not GPIO controlled but a dedicated pin controlled
by SPI directly. This driver is more "direct" and offer a significant speed
increase.

As for duplication, the inner loop of the driver is admittedly functionally
equivalent to the innards of soft_spi_xfer(), but not a duplicate. And it will
obsolete gpio-mscc-bitbang-spi.c (which I will remove in the next patch version).

> 
> >
> > Signed-off-by: Lars Povlsen <lars.povlsen at microsemi.com>
> > ---
> >  MAINTAINERS                  |   1 +
> >  configs/mscc_luton_defconfig |   3 +-
> >  drivers/spi/Kconfig          |   7 +
> >  drivers/spi/Makefile         |   1 +
> >  drivers/spi/mscc_bb_spi.c    | 258
> +++++++++++++++++++++++++++++++++++
> >  5 files changed, 269 insertions(+), 1 deletion(-)
> >  create mode 100644 drivers/spi/mscc_bb_spi.c
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 494962e9b3..0cee99ef56 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -524,6 +524,7 @@ F:	arch/mips/dts/ocelot*
> >  F:	board/mscc/
> >  F:	configs/mscc*
> >  F:	drivers/gpio/mscc_sgpio.c
> > +F:	drivers/spi/mscc_bb_spi.c
> >  F:	include/configs/vcoreiii.h
> >
> >  MIPS JZ4780
> > diff --git a/configs/mscc_luton_defconfig
> b/configs/mscc_luton_defconfig
> > index 65f0672c1e..d48a93a2a5 100644
> > --- a/configs/mscc_luton_defconfig
> > +++ b/configs/mscc_luton_defconfig
> > @@ -34,6 +34,7 @@ CONFIG_CMD_DHCP=y
> >  # CONFIG_NET_TFTP_VARS is not set
> >  # CONFIG_CMD_NFS is not set
> >  CONFIG_CMD_PING=y
> > +CONFIG_CMD_TIME=y
> >  CONFIG_CMD_MTDPARTS=y
> >  CONFIG_MTDIDS_DEFAULT="nor0=spi_flash"
> >
> CONFIG_MTDPARTS_DEFAULT="mtdparts=spi_flash:1m(UBoot),256k(Env),256k(Env
> .bk),512k(Unused),6m(linux)"
> > @@ -66,5 +67,5 @@ CONFIG_DEBUG_UART_SHIFT=2
> >  CONFIG_SYS_NS16550=y
> >  CONFIG_SPI=y
> >  CONFIG_DM_SPI=y
> > -CONFIG_SOFT_SPI=y
> > +CONFIG_MSCC_BB_SPI=y
> >  CONFIG_LZMA=y
> 
> the defconfig change should go into patch 1/2 along with the device-tree
> update

I will do so. (I was earlier advised to keep DT changes in separate patches).

> 
> > diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
> > index a7bb5b35c2..de4d62dd8f 100644
> > --- a/drivers/spi/Kconfig
> > +++ b/drivers/spi/Kconfig
> > @@ -294,6 +294,13 @@ config SOFT_SPI
> >  	 Enable Soft SPI driver. This driver is to use GPIO simulate
> >  	 the SPI protocol.
> >
> > +config MSCC_BB_SPI
> > +	bool "MSCC bitbang SPI driver"
> > +	depends on SOC_VCOREIII
> > +	help
> > +	  Enable MSCC bitbang SPI driver. This driver can be used on
> > +	  MSCC SOCs.
> > +
> >  config CF_SPI
> >  	bool "ColdFire SPI driver"
> >  	help
> > diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
> > index 392a925795..4acec3ea17 100644
> > --- a/drivers/spi/Makefile
> > +++ b/drivers/spi/Makefile
> > @@ -36,6 +36,7 @@ obj-$(CONFIG_MPC8XX_SPI) += mpc8xx_spi.o
> >  obj-$(CONFIG_MPC8XXX_SPI) += mpc8xxx_spi.o
> >  obj-$(CONFIG_MTK_QSPI) += mtk_qspi.o
> >  obj-$(CONFIG_MT7621_SPI) += mt7621_spi.o
> > +obj-$(CONFIG_MSCC_BB_SPI) += mscc_bb_spi.o
> >  obj-$(CONFIG_MVEBU_A3700_SPI) += mvebu_a3700_spi.o
> >  obj-$(CONFIG_MXC_SPI) += mxc_spi.o
> >  obj-$(CONFIG_MXS_SPI) += mxs_spi.o
> > diff --git a/drivers/spi/mscc_bb_spi.c b/drivers/spi/mscc_bb_spi.c
> > new file mode 100644
> > index 0000000000..e0ed2c9725
> > --- /dev/null
> > +++ b/drivers/spi/mscc_bb_spi.c
> > @@ -0,0 +1,258 @@
> > +// SPDX-License-Identifier: (GPL-2.0 OR MIT)
> > +/*
> > + * Microsemi SoCs spi driver
> > + *
> > + * License: Dual MIT/GPL
> 
> line is redundant due to SPDX

Removed.

> 
> > + * Copyright (c) 2018 Microsemi Corporation
> > + */
> > +
> > +#include <common.h>
> > +#include <dm.h>
> > +#include <errno.h>
> > +#include <malloc.h>
> > +#include <spi.h>
> > +#include <dm.h>
> > +#include <asm/gpio.h>
> > +#include <asm/io.h>
> > +#include <linux/delay.h>
> > +
> > +DECLARE_GLOBAL_DATA_PTR;
> 
> you shouldn't need that anymore
> 
> > +
> > +struct mscc_bb_platdata {
> > +	void __iomem *regs;
> > +	u32 deactivate_delay_us;
> > +};
> > +
> > +struct mscc_bb_priv {
> > +	void __iomem *regs;
> > +	bool cs_active;   /* State flag as to whether CS is asserted */
> > +	int cs_num;
> > +	u32 svalue;			/* Value to start transfer with */
> > +	u32 clk1;			/* Clock value start */
> > +	u32 clk2;			/* Clock value 2nd phase */
> > +};
> > +
> > +#define DELAY() \
> > +	asm volatile("nop; nop; nop; nop; nop; nop; nop; nop;" \
> > +		     "nop; nop; nop; nop; nop; nop; nop; nop;" \
> > +		     "nop; nop; nop; nop; nop; nop; nop; nop;")
> 
> driver code shouldn't have such low-level fragments. Since you already
> have something like this in your memory setup code, can't you generalize
> this and export this as SoC specific macro/inline function? Or would
> ndelay() work?

The delay is specifically crafted for the SPI driver, and as such belong here.
It is used to ensure the signal hold times and a well-shaped SPI waveform.
The use of NOP's, although seemingly crude, ensure a minimal delay without
affecting the cache.

Earlier work tried to use ndelay, but this affected both the actual incurred
wait time and cache state for an adverse effect on SPI waveforms and performance.

I will try to generalize the implementation to be more generic and widely usable.

> 
> > +
> > +static int mscc_bb_spi_cs_activate(struct mscc_bb_priv *priv, int
> mode, int cs)
> > +{
> > +	if (!priv->cs_active) {
> > +		int cpha = mode & SPI_CPHA;
> > +		u32 cs_value;
> > +
> > +		priv->cs_num = cs;
> > +
> > +		if (cpha) {
> > +			/* Initial clock starts SCK=1 */
> > +			priv->clk1 = ICPU_SW_MODE_SW_SPI_SCK;
> > +			priv->clk2 = 0;
> > +		} else {
> > +			/* Initial clock starts SCK=0 */
> > +			priv->clk1 = 0;
> > +			priv->clk2 = ICPU_SW_MODE_SW_SPI_SCK;
> > +		}
> > +
> > +		/* Enable bitbang, SCK_OE, SDO_OE */
> > +		priv->svalue = (ICPU_SW_MODE_SW_PIN_CTRL_MODE | /* Bitbang */
> > +				ICPU_SW_MODE_SW_SPI_SCK_OE    | /* SCK_OE */
> > +				ICPU_SW_MODE_SW_SPI_SDO_OE);   /* SDO OE */
> > +
> > +		/* Add CS */
> > +		if (cs >= 0) {
> > +			cs_value =
> > +				ICPU_SW_MODE_SW_SPI_CS_OE(BIT(cs)) |
> > +				ICPU_SW_MODE_SW_SPI_CS(BIT(cs));
> > +		} else {
> > +			cs_value = 0;
> > +		}
> > +
> > +		priv->svalue |= cs_value;
> > +
> > +		/* Enable the CS in HW, Initial clock value */
> > +		writel(priv->svalue | priv->clk2, priv->regs);
> > +
> > +		priv->cs_active = true;
> > +		debug("Activated CS%d\n", priv->cs_num);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int mscc_bb_spi_cs_deactivate(struct mscc_bb_priv *priv, int
> deact_delay)
> > +{
> > +	if (priv->cs_active) {
> > +		/* Keep driving the CLK to its current value while
> > +		 * actively deselecting CS.
> > +		 */
> > +		u32 value = readl(priv->regs);
> > +
> > +		value &= ~ICPU_SW_MODE_SW_SPI_CS_M;
> > +		writel(value, priv->regs);
> > +		DELAY();
> > +
> > +		/* Stop driving the clock, but keep CS with nCS == 1 */
> > +		value &= ~ICPU_SW_MODE_SW_SPI_SCK_OE;
> > +		writel(value, priv->regs);
> > +
> > +		/* Deselect hold time delay */
> > +		if (deact_delay)
> > +			udelay(deact_delay);
> > +
> > +		/* Drop everything */
> > +		writel(0, priv->regs);
> > +
> > +		priv->cs_active = false;
> > +		debug("Deactivated CS%d\n", priv->cs_num);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +int mscc_bb_spi_claim_bus(struct udevice *dev)
> > +{
> > +	return 0;
> > +}
> > +
> > +int mscc_bb_spi_release_bus(struct udevice *dev)
> > +{
> > +	return 0;
> > +}
> > +
> > +int mscc_bb_spi_xfer(struct udevice *dev, unsigned int bitlen,
> > +		     const void *dout, void *din, unsigned long flags)
> > +{
> > +	struct udevice *bus = dev_get_parent(dev);
> > +	struct dm_spi_slave_platdata *plat = dev_get_parent_platdata(dev);
> > +	struct mscc_bb_platdata *bus_plat = dev_get_platdata(bus);
> > +	struct mscc_bb_priv *priv = dev_get_priv(bus);
> > +	u32             i, count;
> > +	const u8	*txd = dout;
> > +	u8		*rxd = din;
> > +
> > +	debug("spi_xfer: slave %s:%s cs%d mode %d, dout %p din %p bitlen
> %u\n",
> > +	      dev->parent->name, dev->name, plat->cs,  plat->mode, dout,
> > +	      din, bitlen);
> > +
> > +	if (flags & SPI_XFER_BEGIN)
> > +		mscc_bb_spi_cs_activate(priv, plat->mode, plat->cs);
> > +
> > +	count = bitlen / 8;
> > +	for (i = 0; i < count; i++) {
> > +		u32 rx = 0, mask = 0x80, value;
> > +
> > +		while (mask) {
> > +			/* Initial condition: CLK is low. */
> > +			value = priv->svalue;
> > +			if (txd && txd[i] & mask)
> > +				value |= ICPU_SW_MODE_SW_SPI_SDO;
> > +
> > +			/* Drive data while taking CLK low. The device
> > +			 * we're accessing will sample on the
> > +			 * following rising edge and will output data
> > +			 * on this edge for us to be sampled at the
> > +			 * end of this loop.
> > +			 */
> > +			writel(value | priv->clk1, priv->regs);
> > +
> > +			/* Wait for t_setup. All devices do have a
> > +			 * setup-time, so we always insert some delay
> > +			 * here. Some devices have a very long
> > +			 * setup-time, which can be adjusted by the
> > +			 * user through vcoreiii_device->delay.
> > +			 */
> > +			DELAY();
> > +
> > +			/* Drive the clock high. */
> > +			writel(value | priv->clk2, priv->regs);
> > +
> > +			/* Wait for t_hold. See comment about t_setup
> > +			 * above.
> > +			 */
> > +			DELAY();
> > +
> > +			/* We sample as close to the next falling edge
> > +			 * as possible.
> > +			 */
> > +			value = readl(priv->regs);
> > +			if (value & ICPU_SW_MODE_SW_SPI_SDI)
> > +				rx |= mask;
> > +			mask >>= 1;
> > +		}
> > +		if (rxd) {
> > +			debug("Read 0x%02x\n", rx);
> > +			rxd[i] = (u8)rx;
> > +		}
> > +		debug("spi_xfer: byte %d/%d\n", i + 1, count);
> > +	}
> > +
> > +	debug("spi_xfer: done\n");
> > +
> > +	if (flags & SPI_XFER_END)
> > +		mscc_bb_spi_cs_deactivate(priv, bus_plat-
> >deactivate_delay_us);
> > +
> > +	return 0;
> > +}
> > +
> > +int mscc_bb_spi_set_speed(struct udevice *dev, unsigned int speed)
> > +{
> > +	/* Accept any speed */
> > +	return 0;
> > +}
> > +
> > +int mscc_bb_spi_set_mode(struct udevice *dev, unsigned int mode)
> > +{
> > +	return 0;
> > +}
> > +
> > +static const struct dm_spi_ops mscc_bb_ops = {
> > +	.claim_bus	= mscc_bb_spi_claim_bus,
> > +	.release_bus	= mscc_bb_spi_release_bus,
> > +	.xfer		= mscc_bb_spi_xfer,
> > +	.set_speed	= mscc_bb_spi_set_speed,
> > +	.set_mode	= mscc_bb_spi_set_mode,
> > +};
> > +
> > +static const struct udevice_id mscc_bb_ids[] = {
> > +	{ .compatible = "mscc,luton-bb-spi" },
> > +	{ }
> > +};
> > +
> > +static int mscc_bb_spi_probe(struct udevice *bus)
> > +{
> > +	struct mscc_bb_priv *priv = dev_get_priv(bus);
> > +	struct mscc_bb_platdata *plat = dev_get_platdata(bus);
> > +
> > +	debug("%s: loaded, priv %p, plat %p\n", __func__, priv, plat);
> > +
> > +	/* Initialize */
> > +	priv->regs = plat->regs;
> > +	priv->cs_active = false;
> > +
> > +	return 0;
> > +}
> > +
> > +static int mscc_bb_spi_ofdata_to_platdata(struct udevice *bus)
> > +{
> > +	struct mscc_bb_platdata *plat = dev_get_platdata(bus);
> > +
> > +	plat->regs = (void __iomem *)dev_read_addr(bus);
> > +
> > +	plat->deactivate_delay_us =
> > +		dev_read_u32_default(bus, "spi-deactivate-delay", 0);
> > +
> > +	return 0;
> > +}
> > +
> > +U_BOOT_DRIVER(mscc_bb) = {
> > +	.name	= "mscc_bb",
> > +	.id	= UCLASS_SPI,
> > +	.of_match = mscc_bb_ids,
> > +	.ops	= &mscc_bb_ops,
> > +	.ofdata_to_platdata = mscc_bb_spi_ofdata_to_platdata,
> > +	.platdata_auto_alloc_size = sizeof(struct mscc_bb_platdata),
> > +	.priv_auto_alloc_size = sizeof(struct mscc_bb_priv),
> > +	.probe	= mscc_bb_spi_probe,
> > +};
> >
> 
> --
> - Daniel


More information about the U-Boot mailing list