[U-Boot] [PATCH] Davinci: SPI performance enhancements
Paulraj, Sandeep
s-paulraj at ti.com
Mon Jun 21 16:41:13 CEST 2010
>
> The following restructuring and optimisations increase the SPI
> read performance from 1.3MiB/s (on da850) to 2.87MiB/s (on da830):
>
> Remove continual revaluation of driver state from the core of the
> copy loop. State can not change during the copy loop, so it is
> possible to move these evaluations to before the copy loop.
>
> Cost is more code space as loop variants are required for each set
> of possible configurations. The loops are simpler however, so the
> extra is only 128bytes on da830 with CONFIG_SPI_HALF_DUPLEX
> defined.
>
> Unrolling the first copy loop iteration allows the TX buffer to be
> pre-loaded reducing SPI clock starvation.
>
> Unrolling the last copy loop iteration removes testing for the
> final loop iteration every time round the loop.
>
> Using the RX buffer empty flag as a transfer throttle allows the
> assumption that it is always safe to write to the TX buffer, so
> polling of TX buffer full flag can be removed.
>
> Signed-off-by: Nick Thompson <nick.thompson at ge.com>
> ---
> da850 and da830 are similar devices. The SPI module is common to
> both, but da850 uses DDR and da830 uses SDRAM. The EVM's might
> not actually be comparable, but they appear to be at least similar.
>
> The speed was tested with a 8MiB transfer from SPI FLASH using:
>
> sf read 0xc0008000 0 0x800000
>
> drivers/spi/davinci_spi.c | 195 +++++++++++++++++++++++++++++-----------
This patch does not apply against Wolfgang's next.
The patch should be against u-boot/next.
> ----
> 1 files changed, 128 insertions(+), 67 deletions(-)
>
> diff --git a/drivers/spi/davinci_spi.c b/drivers/spi/davinci_spi.c
> index 08f837b..4518ecb 100644
> --- a/drivers/spi/davinci_spi.c
> +++ b/drivers/spi/davinci_spi.c
> @@ -66,7 +66,7 @@ void spi_free_slave(struct spi_slave *slave)
> int spi_claim_bus(struct spi_slave *slave)
> {
> struct davinci_spi_slave *ds = to_davinci_spi(slave);
> - unsigned int scalar, data1_reg_val = 0;
> + unsigned int scalar;
>
> /* Enable the SPI hardware */
> writel(SPIGCR0_SPIRST_MASK, &ds->regs->gcr0);
> @@ -93,11 +93,6 @@ int spi_claim_bus(struct spi_slave *slave)
> writel(8 | (scalar << SPIFMT_PRESCALE_SHIFT) |
> (1 << SPIFMT_PHASE_SHIFT), &ds->regs->fmt0);
>
> - /* hold cs active at end of transfer until explicitly de-asserted */
> - data1_reg_val = (1 << SPIDAT1_CSHOLD_SHIFT) |
> - (slave->cs << SPIDAT1_CSNR_SHIFT);
> - writel(data1_reg_val, &ds->regs->dat1);
> -
> /*
> * Including a minor delay. No science here. Should be good even
> with
> * no delay
> @@ -113,8 +108,7 @@ int spi_claim_bus(struct spi_slave *slave)
> writel(0, &ds->regs->lvl);
>
> /* enable SPI */
> - writel((readl(&ds->regs->gcr1) |
> - SPIGCR1_SPIENA_MASK), &ds->regs->gcr1);
> + writel((readl(&ds->regs->gcr1) | SPIGCR1_SPIENA_MASK), &ds->regs-
> >gcr1);
>
> return 0;
> }
> @@ -127,14 +121,125 @@ void spi_release_bus(struct spi_slave *slave)
> writel(SPIGCR0_SPIRST_MASK, &ds->regs->gcr0);
> }
>
> +/*
> + * This functions needs to act like a macro to avoid pipeline reloads in
> the
> + * loops below. Use always_inline. This gains us about 160KiB/s and the
> bloat
> + * appears to be zero bytes (da830).
> + */
> +__attribute__((always_inline))
> +static inline u32 davinci_spi_xfer_data(struct davinci_spi_slave *ds, u32
> data)
> +{
> + u32 buf_reg_val;
> +
> + /* send out data */
> + writel(data, &ds->regs->dat1);
> +
> + /* wait for the data to clock in/out */
> + while ((buf_reg_val = readl(&ds->regs->buf)) & SPIBUF_RXEMPTY_MASK)
> + ;
> +
> + return buf_reg_val;
> +}
> +
> +static int davinci_spi_read(struct spi_slave *slave, unsigned int len,
> + u8 *rxp, unsigned long flags)
> +{
> + struct davinci_spi_slave *ds = to_davinci_spi(slave);
> + unsigned int data1_reg_val;
> +
> + /* enable CS hold, CS[n] and clear the data bits */
> + data1_reg_val = ((1 << SPIDAT1_CSHOLD_SHIFT) |
> + (slave->cs << SPIDAT1_CSNR_SHIFT));
> +
> + /* wait till TXFULL is deasserted */
> + while (readl(&ds->regs->buf) & SPIBUF_TXFULL_MASK)
> + ;
> +
> + /* preload the TX buffer to avoid clock starvation */
> + writel(data1_reg_val, &ds->regs->dat1);
> +
> + /* keep reading 1 byte until only 1 byte left */
> + while ((len--) > 1)
> + *rxp++ = davinci_spi_xfer_data(ds, data1_reg_val);
> +
> + /* clear CS hold when we reach the end */
> + if (flags & SPI_XFER_END)
> + data1_reg_val &= ~(1 << SPIDAT1_CSHOLD_SHIFT);
> +
> + /* read the last byte */
> + *rxp = davinci_spi_xfer_data(ds, data1_reg_val);
> +
> + return 0;
> +}
> +
> +static int davinci_spi_write(struct spi_slave *slave, unsigned int len,
> + const u8 *txp, unsigned long flags)
> +{
> + struct davinci_spi_slave *ds = to_davinci_spi(slave);
> + unsigned int data1_reg_val;
> +
> + /* enable CS hold and clear the data bits */
> + data1_reg_val = ((1 << SPIDAT1_CSHOLD_SHIFT) |
> + (slave->cs << SPIDAT1_CSNR_SHIFT));
> +
> + /* wait till TXFULL is deasserted */
> + while (readl(&ds->regs->buf) & SPIBUF_TXFULL_MASK)
> + ;
> +
> + /* preload the TX buffer to avoid clock starvation */
> + if (len > 2) {
> + writel(data1_reg_val | *txp++, &ds->regs->dat1);
> + len--;
> + }
> +
> + /* keep writing 1 byte until only 1 byte left */
> + while ((len--) > 1)
> + davinci_spi_xfer_data(ds, data1_reg_val | *txp++);
> +
> + /* clear CS hold when we reach the end */
> + if (flags & SPI_XFER_END)
> + data1_reg_val &= ~(1 << SPIDAT1_CSHOLD_SHIFT);
> +
> + /* write the last byte */
> + davinci_spi_xfer_data(ds, data1_reg_val | *txp);
> +
> + return 0;
> +}
> +
> +#ifndef CONFIG_SPI_HALF_DUPLEX
> +static int davinci_spi_read_write(struct spi_slave *slave, unsigned int
> len,
> + u8 *rxp, const u8 *txp, unsigned long flags)
> +{
> + struct davinci_spi_slave *ds = to_davinci_spi(slave);
> + unsigned int data1_reg_val;
> +
> + /* enable CS hold and clear the data bits */
> + data1_reg_val = ((1 << SPIDAT1_CSHOLD_SHIFT) |
> + (slave->cs << SPIDAT1_CSNR_SHIFT));
> +
> + /* wait till TXFULL is deasserted */
> + while (readl(&ds->regs->buf) & SPIBUF_TXFULL_MASK)
> + ;
> +
> + /* keep reading and writing 1 byte until only 1 byte left */
> + while ((len--) > 1)
> + *rxp++ = davinci_spi_xfer_data(ds, data1_reg_val | *txp++);
> +
> + /* clear CS hold when we reach the end */
> + if (flags & SPI_XFER_END)
> + data1_reg_val &= ~(1 << SPIDAT1_CSHOLD_SHIFT);
> +
> + /* read and write the last byte */
> + *rxp = davinci_spi_xfer_data(ds, data1_reg_val | *txp);
> +
> + return 0;
> +}
> +#endif
> +
> int spi_xfer(struct spi_slave *slave, unsigned int bitlen,
> const void *dout, void *din, unsigned long flags)
> {
> - struct davinci_spi_slave *ds = to_davinci_spi(slave);
> - unsigned int len, data1_reg_val = readl(&ds->regs->dat1);
> - unsigned int i_cnt = 0, o_cnt = 0, buf_reg_val;
> - const u8 *txp = dout; /* dout can be NULL for read operation */
> - u8 *rxp = din; /* din can be NULL for write operation */
> + unsigned int len;
>
> if (bitlen == 0)
> /* Finish any previously submitted transfers */
> @@ -154,63 +259,19 @@ int spi_xfer(struct spi_slave *slave, unsigned int
> bitlen,
>
> len = bitlen / 8;
>
> - /* do an empty read to clear the current contents */
> - readl(&ds->regs->buf);
> -
> - /* keep writing and reading 1 byte until done */
> - while ((i_cnt < len) || (o_cnt < len)) {
> - /* read RX buffer and flags */
> - buf_reg_val = readl(&ds->regs->buf);
> -
> - /* if data is available */
> - if ((i_cnt < len) &&
> - (buf_reg_val & SPIBUF_RXEMPTY_MASK) == 0) {
> - /*
> - * If there is no read buffer simply
> - * ignore the read character
> - */
> - if (rxp)
> - *rxp++ = buf_reg_val & 0xFF;
> - /* increment read words count */
> - i_cnt++;
> - }
> -
> - /*
> - * if the tx buffer is empty and there
> - * is still data to transmit
> - */
> - if ((o_cnt < len) &&
> - ((buf_reg_val & SPIBUF_TXFULL_MASK) == 0)) {
> - /* write the data */
> - data1_reg_val &= ~0xFFFF;
> - if (txp)
> - data1_reg_val |= *txp++;
> - /*
> - * Write to DAT1 is required to keep
> - * the serial transfer going.
> - * We just terminate when we reach the end.
> - */
> - if ((o_cnt == (len - 1)) && (flags & SPI_XFER_END)) {
> - /* clear CS hold */
> - writel(data1_reg_val &
> - ~(1 << SPIDAT1_CSHOLD_SHIFT),
> - &ds->regs->dat1);
> - } else {
> - /* enable CS hold and write TX register */
> - data1_reg_val |= ((1 << SPIDAT1_CSHOLD_SHIFT) |
> - (slave->cs << SPIDAT1_CSNR_SHIFT));
> - writel(data1_reg_val, &ds->regs->dat1);
> - }
> - /* increment written words count */
> - o_cnt++;
> - }
> - }
> - return 0;
> + if (!dout)
> + return davinci_spi_read(slave, len, din, flags);
> + else if (!din)
> + return davinci_spi_write(slave, len, dout, flags);
> +#ifndef CONFIG_SPI_HALF_DUPLEX
> + else
> + return davinci_spi_read_write(slave, len, din, dout, flags);
> +#endif
>
> out:
> if (flags & SPI_XFER_END) {
> - writel(data1_reg_val &
> - ~(1 << SPIDAT1_CSHOLD_SHIFT), &ds->regs->dat1);
> + u8 dummy = 0;
> + davinci_spi_write(slave, 1, &dummy, flags);
> }
> return 0;
> }
> --
> 1.7.0.4
More information about the U-Boot
mailing list