[PATCH v2 06/10] spi: dw: Rewrite poll_transfer logic
Damien Le Moal
Damien.LeMoal at wdc.com
Mon Apr 5 02:47:25 CEST 2021
On 2021/04/03 8:05, Sean Anderson wrote:
> This rewrites poll_transfer, dw_writer, and dw_reader.
>
> * We now use RO transfers (instead of always using TR). This eliminates the
> need to send out dummy words, and simplifies the transmit logic.
> * All parameters (except regs and bits_per_word) are passed explicitly.
> * Most parameters have been made explicit (instead of being recalculated on
> every loop).
> * Transfers are measured in units of frames instead of bytes. This matches
> the measurements used by the device and eliminates several divisions by
> bits_per_word.
> * We now check if we have over-/under-flowed the FIFO. This should help
> prevent hangs when the device stops the transfer and U-Boot doesn't
> realize it (and then waits for the remaining data forever). TXUI is not
> present in DW_APB_SSI, but in the worst case we just don't write data.
>
> Unfortunately, this doesn't seem to solve all problems, and there are
> some remaining bugs (such as some transfers containing all 1s or all 0s)
> when we have many fifo overflows. This is solved for DWC devices by
> enabling clock stretching.
> * poll_transfer is now used by dw_spi_exec_op as well as xfer.
> * There are separate inner loops for 8-, 16-, and 32-bit frame sizes. This
> should hopefully reduce the amount of over-/under-flows. However, I
> haven't done exhaustive testing to ensure this is really necessary. In
> particular, I was never able to prevent read overflows at 50MHz clock
> speed.
That sounds like the long fight I had on Linux side... Did you account for the
fact that the K210 has a buggy fifo depth (it reports 32 but is in fact 31) ?
>
> These changes should probably have been split up into several commits, but
> several depend on each other, and it would be difficult to break this up
> while preserving bisectability.
>
> Signed-off-by: Sean Anderson <seanga2 at gmail.com>
> ---
>
> (no changes since v1)
>
> drivers/spi/designware_spi.c | 284 +++++++++++++++++++++--------------
> 1 file changed, 171 insertions(+), 113 deletions(-)
>
> diff --git a/drivers/spi/designware_spi.c b/drivers/spi/designware_spi.c
> index 6375e6d778..72cca14887 100644
> --- a/drivers/spi/designware_spi.c
> +++ b/drivers/spi/designware_spi.c
> @@ -134,14 +134,10 @@ struct dw_spi_priv {
> unsigned int freq; /* Default frequency */
> unsigned int mode;
>
> - const void *tx;
> - const void *tx_end;
> - void *rx;
> - void *rx_end;
> u32 fifo_len; /* depth of the FIFO buffer */
>
> int bits_per_word;
> - int len;
> + int frames; /* Number of frames in the transfer */
> u8 cs; /* chip select pin */
> u8 tmode; /* TR/TO/RO/EEPROM */
> u8 type; /* SPI/SSP/MicroWire */
> @@ -372,14 +368,24 @@ static int dw_spi_probe(struct udevice *bus)
> return 0;
> }
>
> -/* Return the max entries we can fill into tx fifo */
> -static inline u32 tx_max(struct dw_spi_priv *priv)
> +/**
> + * dw_writer() - Write data frames to the tx fifo
> + * @priv: Driver private info
> + * @tx: The tx buffer
> + * @idx: The number of data frames already transmitted
> + * @tx_frames: The number of data frames left to transmit
> + * @rx_frames: The number of data frames left to receive (0 if only
> + * transmitting)
> + * @frame_bytes: The number of bytes taken up by one data frame
> + *
> + * This function writes up to @tx_frames data frames using data from @tx[@idx].
> + *
> + * Return: The number of frames read
> + */
> +static uint dw_writer(struct dw_spi_priv *priv, const void *tx, uint idx,
> + uint tx_frames, uint rx_frames, uint frame_bytes)
> {
> - u32 tx_left, tx_room, rxtx_gap;
> -
> - tx_left = (priv->tx_end - priv->tx) / (priv->bits_per_word >> 3);
> - tx_room = priv->fifo_len - dw_read(priv, DW_SPI_TXFLR);
> -
> + u32 tx_room = priv->fifo_len - dw_read(priv, DW_SPI_TXFLR);
> /*
> * Another concern is about the tx/rx mismatch, we
> * thought about using (priv->fifo_len - rxflr - txflr) as
> @@ -388,67 +394,131 @@ static inline u32 tx_max(struct dw_spi_priv *priv)
> * shift registers. So a control from sw point of
> * view is taken.
> */
> - rxtx_gap = ((priv->rx_end - priv->rx) - (priv->tx_end - priv->tx)) /
> - (priv->bits_per_word >> 3);
> + u32 rxtx_gap = rx_frames - tx_frames;
> + u32 count = min3(tx_frames, tx_room, (u32)(priv->fifo_len - rxtx_gap));
> + u32 *dr = priv->regs + DW_SPI_DR;
>
> - return min3(tx_left, tx_room, (u32)(priv->fifo_len - rxtx_gap));
> -}
> + if (!count)
> + return 0;
>
> -/* Return the max entries we should read out of rx fifo */
> -static inline u32 rx_max(struct dw_spi_priv *priv)
> -{
> - u32 rx_left = (priv->rx_end - priv->rx) / (priv->bits_per_word >> 3);
> -
> - return min_t(u32, rx_left, dw_read(priv, DW_SPI_RXFLR));
> -}
> -
> -static void dw_writer(struct dw_spi_priv *priv)
> -{
> - u32 max = tx_max(priv);
> - u32 txw = 0xFFFFFFFF;
> -
> - while (max--) {
> - /* Set the tx word if the transfer's original "tx" is not null */
> - if (priv->tx_end - priv->len) {
> - if (priv->bits_per_word == 8)
> - txw = *(u8 *)(priv->tx);
> - else
> - txw = *(u16 *)(priv->tx);
> - }
> - dw_write(priv, DW_SPI_DR, txw);
> - log_content("tx=0x%02x\n", txw);
> - priv->tx += priv->bits_per_word >> 3;
> +#define do_write(type) do { \
> + type *start = ((type *)tx) + idx; \
> + type *end = start + count; \
> + do { \
> + __raw_writel(*start++, dr); \
> + } while (start < end); \
> +} while (0)
> + switch (frame_bytes) {
> + case 1:
> + do_write(u8);
> + break;
> + case 2:
> + do_write(u16);
> + break;
> + case 3:
> + case 4:
> + default:
> + do_write(u32);
> + break;
> }
> +#undef do_write
> +
> + return count;
> }
>
> -static void dw_reader(struct dw_spi_priv *priv)
> +/**
> + * dw_reader() - Read data frames from the rx fifo
> + * @priv: Driver private data
> + * @rx: The rx buffer
> + * @idx: The number of data frames already received
> + * @frames: The number of data frames left to receive
> + * @frame_bytes: The size of a data frame in bytes
> + *
> + * This function reads up to @frames data frames from @rx[@idx].
> + *
> + * Return: The number of frames read
> + */
> +static uint dw_reader(struct dw_spi_priv *priv, void *rx, uint idx, uint frames,
> + uint frame_bytes)
> {
> - u32 max = rx_max(priv);
> - u16 rxw;
> + u32 rx_lvl = dw_read(priv, DW_SPI_RXFLR);
> + u32 count = min(frames, rx_lvl);
> + u32 *dr = priv->regs + DW_SPI_DR;
>
> - while (max--) {
> - rxw = dw_read(priv, DW_SPI_DR);
> - log_content("rx=0x%02x\n", rxw);
> + if (!count)
> + return 0;
>
> - /* Care about rx if the transfer's original "rx" is not null */
> - if (priv->rx_end - priv->len) {
> - if (priv->bits_per_word == 8)
> - *(u8 *)(priv->rx) = rxw;
> - else
> - *(u16 *)(priv->rx) = rxw;
> - }
> - priv->rx += priv->bits_per_word >> 3;
> +#define do_read(type) do { \
> + type *start = ((type *)rx) + idx; \
> + type *end = start + count; \
> + do { \
> + *start++ = __raw_readl(dr); \
> + } while (start < end); \
> +} while (0)
> + switch (frame_bytes) {
> + case 1:
> + do_read(u8);
> + break;
> + case 2:
> + do_read(u16);
> + break;
> + case 3:
> + case 4:
> + default:
> + do_read(u32);
> + break;
> }
> +#undef do_read
> +
> + return count;
> }
>
> -static int poll_transfer(struct dw_spi_priv *priv)
> +/**
> + * poll_transfer() - Transmit and receive data frames
> + * @priv: Driver private data
> + * @tx: The tx buffer. May be %NULL to only receive.
> + * @rx: The rx buffer. May be %NULL to discard read data.
> + * @frames: The number of data frames to transfer
> + *
> + * Transmit @tx, while recieving @rx.
> + *
> + * Return: The lesser of the number of frames transmitted or received.
> + */
> +static uint poll_transfer(struct dw_spi_priv *priv, const void *tx, void *rx,
> + uint frames)
> {
> - do {
> - dw_writer(priv);
> - dw_reader(priv);
> - } while (priv->rx_end > priv->rx);
> + uint frame_bytes = priv->bits_per_word >> 3;
> + uint tx_idx = 0;
> + uint rx_idx = 0;
> + uint tx_frames = tx ? frames : 0;
> + uint rx_frames = rx ? frames : 0;
>
> - return 0;
> + while (tx_frames || rx_frames) {
> + if (tx_frames) {
> + uint tx_diff = dw_writer(priv, tx, tx_idx, tx_frames,
> + rx_frames, frame_bytes);
> +
> + tx_idx += tx_diff;
> + tx_frames -= tx_diff;
> + }
> +
> + if (rx_frames) {
> + uint rx_diff = dw_reader(priv, rx, rx_idx, rx_frames,
> + frame_bytes);
> +
> + rx_idx += rx_diff;
> + rx_frames -= rx_diff;
> + }
> +
> + /*
> + * If we don't read/write fast enough, the transfer stops.
> + * Don't bother reading out what's left in the FIFO; it's
> + * garbage.
> + */
> + if (dw_read(priv, DW_SPI_RISR) & (ISR_RXOI | ISR_TXUI))
> + break;
> + }
> + return min(tx ? tx_idx : rx_idx, rx ? rx_idx : tx_idx);
> }
>
> /*
> @@ -474,19 +544,21 @@ static int dw_spi_xfer(struct udevice *dev, unsigned int bitlen,
> {
> struct udevice *bus = dev->parent;
> struct dw_spi_priv *priv = dev_get_priv(bus);
> - const u8 *tx = dout;
> + const void *tx = dout;
> u8 *rx = din;
> int ret = 0;
> u32 cr0 = 0;
> - u32 val;
> - u32 cs;
> + u32 val, cs;
> + uint frames;
>
> /* spi core configured to do 8 bit transfers */
> - if (bitlen % 8) {
> + if (bitlen % priv->bits_per_word) {
> dev_err(dev, "Non byte aligned SPI transfer.\n");
> return -1;
> }
>
> + frames = bitlen / priv->bits_per_word;
> +
> /* Start the transaction if necessary. */
> if (flags & SPI_XFER_BEGIN)
> external_cs_manage(dev, false);
> @@ -496,30 +568,22 @@ static int dw_spi_xfer(struct udevice *dev, unsigned int bitlen,
> else if (rx)
> priv->tmode = CTRLR0_TMOD_RO;
> else
> - /*
> - * In transmit only mode (CTRL0_TMOD_TO) input FIFO never gets
> - * any data which breaks our logic in poll_transfer() above.
> - */
> - priv->tmode = CTRLR0_TMOD_TR;
> + priv->tmode = CTRLR0_TMOD_TO;
>
> cr0 = dw_spi_update_cr0(priv);
>
> - priv->len = bitlen >> 3;
> -
> - priv->tx = (void *)tx;
> - priv->tx_end = priv->tx + priv->len;
> - priv->rx = rx;
> - priv->rx_end = priv->rx + priv->len;
> -
> /* Disable controller before writing control registers */
> dw_write(priv, DW_SPI_SSIENR, 0);
>
> - dev_dbg(dev, "cr0=%08x rx=%p tx=%p len=%d [bytes]\n", cr0, rx, tx,
> - priv->len);
> + dev_dbg(dev, "cr0=%08x rx=%p tx=%p frames=%d\n", cr0, rx, tx,
> + frames);
> /* Reprogram cr0 only if changed */
> if (dw_read(priv, DW_SPI_CTRLR0) != cr0)
> dw_write(priv, DW_SPI_CTRLR0, cr0);
>
> + if (rx)
> + dw_write(priv, DW_SPI_CTRLR1, frames - 1);
> +
> /*
> * Configure the desired SS (slave select 0...3) in the controller
> * The DW SPI controller will activate and deactivate this CS
> @@ -531,8 +595,15 @@ static int dw_spi_xfer(struct udevice *dev, unsigned int bitlen,
> /* Enable controller after writing control registers */
> dw_write(priv, DW_SPI_SSIENR, 1);
>
> + /*
> + * Prime the pump. RO-mode doesn't work unless something gets written to
> + * the FIFO
> + */
> + if (rx && !tx)
> + dw_write(priv, DW_SPI_DR, 0xFFFFFFFF);
> +
> /* Start transfer in a polling loop */
> - ret = poll_transfer(priv);
> + poll_transfer(priv, tx, rx, frames);
>
> /*
> * Wait for current transmit operation to complete.
> @@ -565,9 +636,10 @@ static int dw_spi_exec_op(struct spi_slave *slave, const struct spi_mem_op *op)
> int pos, i, ret = 0;
> struct udevice *bus = slave->dev->parent;
> struct dw_spi_priv *priv = dev_get_priv(bus);
> + struct spi_mem_op *mut_op = (struct spi_mem_op *)op;
> u8 op_len = sizeof(op->cmd.opcode) + op->addr.nbytes + op->dummy.nbytes;
> u8 op_buf[op_len];
> - u32 cr0;
> + u32 cr0, val;
>
> if (read)
> priv->tmode = CTRLR0_TMOD_EPROMREAD;
> @@ -597,47 +669,33 @@ static int dw_spi_exec_op(struct spi_slave *slave, const struct spi_mem_op *op)
> if (op->dummy.nbytes)
> memset(op_buf + pos, 0xff, op->dummy.nbytes);
>
> - external_cs_manage(slave->dev, false);
> + dw_writer(priv, &op_buf, 0, op_len, 0, sizeof(u8));
>
> - priv->tx = &op_buf;
> - priv->tx_end = priv->tx + op_len;
> - priv->rx = NULL;
> - priv->rx_end = NULL;
> - while (priv->tx != priv->tx_end)
> - dw_writer(priv);
> + external_cs_manage(slave->dev, false);
> + dw_write(priv, DW_SPI_SER, 1 << spi_chip_select(slave->dev));
>
> /*
> * XXX: The following are tight loops! Enabling debug messages may cause
> * them to fail because we are not reading/writing the fifo fast enough.
> */
> - if (read) {
> - priv->rx = op->data.buf.in;
> - priv->rx_end = priv->rx + op->data.nbytes;
> + if (read)
> + mut_op->data.nbytes = poll_transfer(priv, NULL, op->data.buf.in,
> + op->data.nbytes);
> + else
> + mut_op->data.nbytes = poll_transfer(priv, op->data.buf.out,
> + NULL, op->data.nbytes);
>
> - dw_write(priv, DW_SPI_SER, 1 << spi_chip_select(slave->dev));
> - while (priv->rx != priv->rx_end)
> - dw_reader(priv);
> - } else {
> - u32 val;
> -
> - priv->tx = op->data.buf.out;
> - priv->tx_end = priv->tx + op->data.nbytes;
> -
> - /* Fill up the write fifo before starting the transfer */
> - dw_writer(priv);
> - dw_write(priv, DW_SPI_SER, 1 << spi_chip_select(slave->dev));
> - while (priv->tx != priv->tx_end)
> - dw_writer(priv);
> -
> - if (readl_poll_timeout(priv->regs + DW_SPI_SR, val,
> - (val & SR_TF_EMPT) && !(val & SR_BUSY),
> - RX_TIMEOUT * 1000)) {
> - dev_dbg(bus, "timed out; sr=%x\n",
> - dw_read(priv, DW_SPI_SR));
> - ret = -ETIMEDOUT;
> - }
> + /*
> + * Ensure the data (or the instruction for zero-data instructions) has
> + * been transmitted from the fifo/shift register before disabling the
> + * device.
> + */
> + if (readl_poll_timeout(priv->regs + DW_SPI_SR, val,
> + (val & SR_TF_EMPT) && !(val & SR_BUSY),
> + RX_TIMEOUT * 1000)) {
> + dev_dbg(bus, "timed out; sr=%x\n", dw_read(priv, DW_SPI_SR));
> + ret = -ETIMEDOUT;
> }
> -
> dw_write(priv, DW_SPI_SER, 0);
> external_cs_manage(slave->dev, true);
>
>
--
Damien Le Moal
Western Digital Research
More information about the U-Boot
mailing list