[PATCH v2 06/10] spi: dw: Rewrite poll_transfer logic
Sean Anderson
seanga2 at gmail.com
Mon Apr 5 03:11:51 CEST 2021
On 4/4/21 8:47 PM, Damien Le Moal wrote:
> On 2021/04/03 8:05, Sean Anderson wrote:
>> This rewrites poll_transfer, dw_writer, and dw_reader.
>>
>> * We now use RO transfers (instead of always using TR). This eliminates the
>> need to send out dummy words, and simplifies the transmit logic.
>> * All parameters (except regs and bits_per_word) are passed explicitly.
>> * Most parameters have been made explicit (instead of being recalculated on
>> every loop).
>> * Transfers are measured in units of frames instead of bytes. This matches
>> the measurements used by the device and eliminates several divisions by
>> bits_per_word.
>> * We now check if we have over-/under-flowed the FIFO. This should help
>> prevent hangs when the device stops the transfer and U-Boot doesn't
>> realize it (and then waits for the remaining data forever). TXUI is not
>> present in DW_APB_SSI, but in the worst case we just don't write data.
>>
>> Unfortunately, this doesn't seem to solve all problems, and there are
>> some remaining bugs (such as some transfers containing all 1s or all 0s)
>> when we have many fifo overflows. This is solved for DWC devices by
>> enabling clock stretching.
>> * poll_transfer is now used by dw_spi_exec_op as well as xfer.
>> * There are separate inner loops for 8-, 16-, and 32-bit frame sizes. This
>> should hopefully reduce the amount of over-/under-flows. However, I
>> haven't done exhaustive testing to ensure this is really necessary. In
>> particular, I was never able to prevent read overflows at 50MHz clock
>> speed.
>
> That sounds like the long fight I had on Linux side... Did you account for the
> fact that the K210 has a buggy fifo depth (it reports 32 but is in fact 31) ?
Hm, I forgot about that. I think that's a good line of investigation.
--Sean
>
>>
>> These changes should probably have been split up into several commits, but
>> several depend on each other, and it would be difficult to break this up
>> while preserving bisectability.
>>
>> Signed-off-by: Sean Anderson <seanga2 at gmail.com>
>> ---
>>
>> (no changes since v1)
>>
>> drivers/spi/designware_spi.c | 284 +++++++++++++++++++++--------------
>> 1 file changed, 171 insertions(+), 113 deletions(-)
>>
>> diff --git a/drivers/spi/designware_spi.c b/drivers/spi/designware_spi.c
>> index 6375e6d778..72cca14887 100644
>> --- a/drivers/spi/designware_spi.c
>> +++ b/drivers/spi/designware_spi.c
>> @@ -134,14 +134,10 @@ struct dw_spi_priv {
>> unsigned int freq; /* Default frequency */
>> unsigned int mode;
>>
>> - const void *tx;
>> - const void *tx_end;
>> - void *rx;
>> - void *rx_end;
>> u32 fifo_len; /* depth of the FIFO buffer */
>>
>> int bits_per_word;
>> - int len;
>> + int frames; /* Number of frames in the transfer */
>> u8 cs; /* chip select pin */
>> u8 tmode; /* TR/TO/RO/EEPROM */
>> u8 type; /* SPI/SSP/MicroWire */
>> @@ -372,14 +368,24 @@ static int dw_spi_probe(struct udevice *bus)
>> return 0;
>> }
>>
>> -/* Return the max entries we can fill into tx fifo */
>> -static inline u32 tx_max(struct dw_spi_priv *priv)
>> +/**
>> + * dw_writer() - Write data frames to the tx fifo
>> + * @priv: Driver private info
>> + * @tx: The tx buffer
>> + * @idx: The number of data frames already transmitted
>> + * @tx_frames: The number of data frames left to transmit
>> + * @rx_frames: The number of data frames left to receive (0 if only
>> + * transmitting)
>> + * @frame_bytes: The number of bytes taken up by one data frame
>> + *
>> + * This function writes up to @tx_frames data frames using data from @tx[@idx].
>> + *
>> + * Return: The number of frames read
>> + */
>> +static uint dw_writer(struct dw_spi_priv *priv, const void *tx, uint idx,
>> + uint tx_frames, uint rx_frames, uint frame_bytes)
>> {
>> - u32 tx_left, tx_room, rxtx_gap;
>> -
>> - tx_left = (priv->tx_end - priv->tx) / (priv->bits_per_word >> 3);
>> - tx_room = priv->fifo_len - dw_read(priv, DW_SPI_TXFLR);
>> -
>> + u32 tx_room = priv->fifo_len - dw_read(priv, DW_SPI_TXFLR);
>> /*
>> * Another concern is about the tx/rx mismatch, we
>> * thought about using (priv->fifo_len - rxflr - txflr) as
>> @@ -388,67 +394,131 @@ static inline u32 tx_max(struct dw_spi_priv *priv)
>> * shift registers. So a control from sw point of
>> * view is taken.
>> */
>> - rxtx_gap = ((priv->rx_end - priv->rx) - (priv->tx_end - priv->tx)) /
>> - (priv->bits_per_word >> 3);
>> + u32 rxtx_gap = rx_frames - tx_frames;
>> + u32 count = min3(tx_frames, tx_room, (u32)(priv->fifo_len - rxtx_gap));
>> + u32 *dr = priv->regs + DW_SPI_DR;
>>
>> - return min3(tx_left, tx_room, (u32)(priv->fifo_len - rxtx_gap));
>> -}
>> + if (!count)
>> + return 0;
>>
>> -/* Return the max entries we should read out of rx fifo */
>> -static inline u32 rx_max(struct dw_spi_priv *priv)
>> -{
>> - u32 rx_left = (priv->rx_end - priv->rx) / (priv->bits_per_word >> 3);
>> -
>> - return min_t(u32, rx_left, dw_read(priv, DW_SPI_RXFLR));
>> -}
>> -
>> -static void dw_writer(struct dw_spi_priv *priv)
>> -{
>> - u32 max = tx_max(priv);
>> - u32 txw = 0xFFFFFFFF;
>> -
>> - while (max--) {
>> - /* Set the tx word if the transfer's original "tx" is not null */
>> - if (priv->tx_end - priv->len) {
>> - if (priv->bits_per_word == 8)
>> - txw = *(u8 *)(priv->tx);
>> - else
>> - txw = *(u16 *)(priv->tx);
>> - }
>> - dw_write(priv, DW_SPI_DR, txw);
>> - log_content("tx=0x%02x\n", txw);
>> - priv->tx += priv->bits_per_word >> 3;
>> +#define do_write(type) do { \
>> + type *start = ((type *)tx) + idx; \
>> + type *end = start + count; \
>> + do { \
>> + __raw_writel(*start++, dr); \
>> + } while (start < end); \
>> +} while (0)
>> + switch (frame_bytes) {
>> + case 1:
>> + do_write(u8);
>> + break;
>> + case 2:
>> + do_write(u16);
>> + break;
>> + case 3:
>> + case 4:
>> + default:
>> + do_write(u32);
>> + break;
>> }
>> +#undef do_write
>> +
>> + return count;
>> }
>>
>> -static void dw_reader(struct dw_spi_priv *priv)
>> +/**
>> + * dw_reader() - Read data frames from the rx fifo
>> + * @priv: Driver private data
>> + * @rx: The rx buffer
>> + * @idx: The number of data frames already received
>> + * @frames: The number of data frames left to receive
>> + * @frame_bytes: The size of a data frame in bytes
>> + *
>> + * This function reads up to @frames data frames from @rx[@idx].
>> + *
>> + * Return: The number of frames read
>> + */
>> +static uint dw_reader(struct dw_spi_priv *priv, void *rx, uint idx, uint frames,
>> + uint frame_bytes)
>> {
>> - u32 max = rx_max(priv);
>> - u16 rxw;
>> + u32 rx_lvl = dw_read(priv, DW_SPI_RXFLR);
>> + u32 count = min(frames, rx_lvl);
>> + u32 *dr = priv->regs + DW_SPI_DR;
>>
>> - while (max--) {
>> - rxw = dw_read(priv, DW_SPI_DR);
>> - log_content("rx=0x%02x\n", rxw);
>> + if (!count)
>> + return 0;
>>
>> - /* Care about rx if the transfer's original "rx" is not null */
>> - if (priv->rx_end - priv->len) {
>> - if (priv->bits_per_word == 8)
>> - *(u8 *)(priv->rx) = rxw;
>> - else
>> - *(u16 *)(priv->rx) = rxw;
>> - }
>> - priv->rx += priv->bits_per_word >> 3;
>> +#define do_read(type) do { \
>> + type *start = ((type *)rx) + idx; \
>> + type *end = start + count; \
>> + do { \
>> + *start++ = __raw_readl(dr); \
>> + } while (start < end); \
>> +} while (0)
>> + switch (frame_bytes) {
>> + case 1:
>> + do_read(u8);
>> + break;
>> + case 2:
>> + do_read(u16);
>> + break;
>> + case 3:
>> + case 4:
>> + default:
>> + do_read(u32);
>> + break;
>> }
>> +#undef do_read
>> +
>> + return count;
>> }
>>
>> -static int poll_transfer(struct dw_spi_priv *priv)
>> +/**
>> + * poll_transfer() - Transmit and receive data frames
>> + * @priv: Driver private data
>> + * @tx: The tx buffer. May be %NULL to only receive.
>> + * @rx: The rx buffer. May be %NULL to discard read data.
>> + * @frames: The number of data frames to transfer
>> + *
>> + * Transmit @tx, while recieving @rx.
>> + *
>> + * Return: The lesser of the number of frames transmitted or received.
>> + */
>> +static uint poll_transfer(struct dw_spi_priv *priv, const void *tx, void *rx,
>> + uint frames)
>> {
>> - do {
>> - dw_writer(priv);
>> - dw_reader(priv);
>> - } while (priv->rx_end > priv->rx);
>> + uint frame_bytes = priv->bits_per_word >> 3;
>> + uint tx_idx = 0;
>> + uint rx_idx = 0;
>> + uint tx_frames = tx ? frames : 0;
>> + uint rx_frames = rx ? frames : 0;
>>
>> - return 0;
>> + while (tx_frames || rx_frames) {
>> + if (tx_frames) {
>> + uint tx_diff = dw_writer(priv, tx, tx_idx, tx_frames,
>> + rx_frames, frame_bytes);
>> +
>> + tx_idx += tx_diff;
>> + tx_frames -= tx_diff;
>> + }
>> +
>> + if (rx_frames) {
>> + uint rx_diff = dw_reader(priv, rx, rx_idx, rx_frames,
>> + frame_bytes);
>> +
>> + rx_idx += rx_diff;
>> + rx_frames -= rx_diff;
>> + }
>> +
>> + /*
>> + * If we don't read/write fast enough, the transfer stops.
>> + * Don't bother reading out what's left in the FIFO; it's
>> + * garbage.
>> + */
>> + if (dw_read(priv, DW_SPI_RISR) & (ISR_RXOI | ISR_TXUI))
>> + break;
>> + }
>> + return min(tx ? tx_idx : rx_idx, rx ? rx_idx : tx_idx);
>> }
>>
>> /*
>> @@ -474,19 +544,21 @@ static int dw_spi_xfer(struct udevice *dev, unsigned int bitlen,
>> {
>> struct udevice *bus = dev->parent;
>> struct dw_spi_priv *priv = dev_get_priv(bus);
>> - const u8 *tx = dout;
>> + const void *tx = dout;
>> u8 *rx = din;
>> int ret = 0;
>> u32 cr0 = 0;
>> - u32 val;
>> - u32 cs;
>> + u32 val, cs;
>> + uint frames;
>>
>> /* spi core configured to do 8 bit transfers */
>> - if (bitlen % 8) {
>> + if (bitlen % priv->bits_per_word) {
>> dev_err(dev, "Non byte aligned SPI transfer.\n");
>> return -1;
>> }
>>
>> + frames = bitlen / priv->bits_per_word;
>> +
>> /* Start the transaction if necessary. */
>> if (flags & SPI_XFER_BEGIN)
>> external_cs_manage(dev, false);
>> @@ -496,30 +568,22 @@ static int dw_spi_xfer(struct udevice *dev, unsigned int bitlen,
>> else if (rx)
>> priv->tmode = CTRLR0_TMOD_RO;
>> else
>> - /*
>> - * In transmit only mode (CTRL0_TMOD_TO) input FIFO never gets
>> - * any data which breaks our logic in poll_transfer() above.
>> - */
>> - priv->tmode = CTRLR0_TMOD_TR;
>> + priv->tmode = CTRLR0_TMOD_TO;
>>
>> cr0 = dw_spi_update_cr0(priv);
>>
>> - priv->len = bitlen >> 3;
>> -
>> - priv->tx = (void *)tx;
>> - priv->tx_end = priv->tx + priv->len;
>> - priv->rx = rx;
>> - priv->rx_end = priv->rx + priv->len;
>> -
>> /* Disable controller before writing control registers */
>> dw_write(priv, DW_SPI_SSIENR, 0);
>>
>> - dev_dbg(dev, "cr0=%08x rx=%p tx=%p len=%d [bytes]\n", cr0, rx, tx,
>> - priv->len);
>> + dev_dbg(dev, "cr0=%08x rx=%p tx=%p frames=%d\n", cr0, rx, tx,
>> + frames);
>> /* Reprogram cr0 only if changed */
>> if (dw_read(priv, DW_SPI_CTRLR0) != cr0)
>> dw_write(priv, DW_SPI_CTRLR0, cr0);
>>
>> + if (rx)
>> + dw_write(priv, DW_SPI_CTRLR1, frames - 1);
>> +
>> /*
>> * Configure the desired SS (slave select 0...3) in the controller
>> * The DW SPI controller will activate and deactivate this CS
>> @@ -531,8 +595,15 @@ static int dw_spi_xfer(struct udevice *dev, unsigned int bitlen,
>> /* Enable controller after writing control registers */
>> dw_write(priv, DW_SPI_SSIENR, 1);
>>
>> + /*
>> + * Prime the pump. RO-mode doesn't work unless something gets written to
>> + * the FIFO
>> + */
>> + if (rx && !tx)
>> + dw_write(priv, DW_SPI_DR, 0xFFFFFFFF);
>> +
>> /* Start transfer in a polling loop */
>> - ret = poll_transfer(priv);
>> + poll_transfer(priv, tx, rx, frames);
>>
>> /*
>> * Wait for current transmit operation to complete.
>> @@ -565,9 +636,10 @@ static int dw_spi_exec_op(struct spi_slave *slave, const struct spi_mem_op *op)
>> int pos, i, ret = 0;
>> struct udevice *bus = slave->dev->parent;
>> struct dw_spi_priv *priv = dev_get_priv(bus);
>> + struct spi_mem_op *mut_op = (struct spi_mem_op *)op;
>> u8 op_len = sizeof(op->cmd.opcode) + op->addr.nbytes + op->dummy.nbytes;
>> u8 op_buf[op_len];
>> - u32 cr0;
>> + u32 cr0, val;
>>
>> if (read)
>> priv->tmode = CTRLR0_TMOD_EPROMREAD;
>> @@ -597,47 +669,33 @@ static int dw_spi_exec_op(struct spi_slave *slave, const struct spi_mem_op *op)
>> if (op->dummy.nbytes)
>> memset(op_buf + pos, 0xff, op->dummy.nbytes);
>>
>> - external_cs_manage(slave->dev, false);
>> + dw_writer(priv, &op_buf, 0, op_len, 0, sizeof(u8));
>>
>> - priv->tx = &op_buf;
>> - priv->tx_end = priv->tx + op_len;
>> - priv->rx = NULL;
>> - priv->rx_end = NULL;
>> - while (priv->tx != priv->tx_end)
>> - dw_writer(priv);
>> + external_cs_manage(slave->dev, false);
>> + dw_write(priv, DW_SPI_SER, 1 << spi_chip_select(slave->dev));
>>
>> /*
>> * XXX: The following are tight loops! Enabling debug messages may cause
>> * them to fail because we are not reading/writing the fifo fast enough.
>> */
>> - if (read) {
>> - priv->rx = op->data.buf.in;
>> - priv->rx_end = priv->rx + op->data.nbytes;
>> + if (read)
>> + mut_op->data.nbytes = poll_transfer(priv, NULL, op->data.buf.in,
>> + op->data.nbytes);
>> + else
>> + mut_op->data.nbytes = poll_transfer(priv, op->data.buf.out,
>> + NULL, op->data.nbytes);
>>
>> - dw_write(priv, DW_SPI_SER, 1 << spi_chip_select(slave->dev));
>> - while (priv->rx != priv->rx_end)
>> - dw_reader(priv);
>> - } else {
>> - u32 val;
>> -
>> - priv->tx = op->data.buf.out;
>> - priv->tx_end = priv->tx + op->data.nbytes;
>> -
>> - /* Fill up the write fifo before starting the transfer */
>> - dw_writer(priv);
>> - dw_write(priv, DW_SPI_SER, 1 << spi_chip_select(slave->dev));
>> - while (priv->tx != priv->tx_end)
>> - dw_writer(priv);
>> -
>> - if (readl_poll_timeout(priv->regs + DW_SPI_SR, val,
>> - (val & SR_TF_EMPT) && !(val & SR_BUSY),
>> - RX_TIMEOUT * 1000)) {
>> - dev_dbg(bus, "timed out; sr=%x\n",
>> - dw_read(priv, DW_SPI_SR));
>> - ret = -ETIMEDOUT;
>> - }
>> + /*
>> + * Ensure the data (or the instruction for zero-data instructions) has
>> + * been transmitted from the fifo/shift register before disabling the
>> + * device.
>> + */
>> + if (readl_poll_timeout(priv->regs + DW_SPI_SR, val,
>> + (val & SR_TF_EMPT) && !(val & SR_BUSY),
>> + RX_TIMEOUT * 1000)) {
>> + dev_dbg(bus, "timed out; sr=%x\n", dw_read(priv, DW_SPI_SR));
>> + ret = -ETIMEDOUT;
>> }
>> -
>> dw_write(priv, DW_SPI_SER, 0);
>> external_cs_manage(slave->dev, true);
>>
>>
>
>
More information about the U-Boot
mailing list