[PATCH v3 3/3] drivers/spi/airoha_snfi_spi: add dma support
Mikhail Kshevetskiy
mikhail.kshevetskiy at iopsys.eu
Fri Jun 20 01:54:48 CEST 2025
This patch speed up cache reading/writing/updating opearions.
It was tested on en7323/an7581 and some other Airoha chips.
Unlike the current design it does not use any knowledge of the spi-nand
page/oob size. Instead it will only use the information from the spi-mem
request. It will speed up
* page reading/writing without oob
* page reading/writing with oob
* oob reading/writing (significant for UBI scanning)
The only know issue appears in a very specific conditions for en7523 family
chips only.
Signed-off-by: Mikhail Kshevetskiy <mikhail.kshevetskiy at iopsys.eu>
---
drivers/spi/airoha_snfi_spi.c | 526 ++++++++++++++++++++++++++++++++++
1 file changed, 526 insertions(+)
diff --git a/drivers/spi/airoha_snfi_spi.c b/drivers/spi/airoha_snfi_spi.c
index 363590231e2..9f6434ef177 100644
--- a/drivers/spi/airoha_snfi_spi.c
+++ b/drivers/spi/airoha_snfi_spi.c
@@ -119,6 +119,7 @@
#define SPI_NFI_AHB_DONE BIT(6)
#define REG_SPI_NFI_CMD 0x0020
+#define SPI_NFI_CMD_WRITE BIT(7)
#define REG_SPI_NFI_ADDR_NOB 0x0030
#define SPI_NFI_ROW_ADDR_NOB GENMASK(6, 4)
@@ -141,12 +142,17 @@
#define SPI_NFI_CUS_SEC_SIZE_EN BIT(16)
#define REG_SPI_NFI_RD_CTL2 0x0510
+#define SPI_NFI_DATA_READ_CMD GENMASK(7, 0)
+
#define REG_SPI_NFI_RD_CTL3 0x0514
+#define SPI_NFI_DATA_READ_ADDRESS GENMASK(12, 0)
#define REG_SPI_NFI_PG_CTL1 0x0524
#define SPI_NFI_PG_LOAD_CMD GENMASK(15, 8)
#define REG_SPI_NFI_PG_CTL2 0x0528
+#define SPI_NFI_PG_LOAD_ADDR GENMASK(12, 0)
+
#define REG_SPI_NFI_NOR_PROG_ADDR 0x052c
#define REG_SPI_NFI_NOR_RD_ADDR 0x0534
@@ -173,7 +179,9 @@
#define SPI_NAND_OP_READ_FROM_CACHE_SINGLE 0x03
#define SPI_NAND_OP_READ_FROM_CACHE_SINGLE_FAST 0x0b
#define SPI_NAND_OP_READ_FROM_CACHE_DUAL 0x3b
+#define SPI_NAND_OP_READ_FROM_CACHE_DUALIO 0xbb
#define SPI_NAND_OP_READ_FROM_CACHE_QUAD 0x6b
+#define SPI_NAND_OP_READ_FROM_CACHE_QUADIO 0xeb
#define SPI_NAND_OP_WRITE_ENABLE 0x06
#define SPI_NAND_OP_WRITE_DISABLE 0x04
#define SPI_NAND_OP_PROGRAM_LOAD_SINGLE 0x02
@@ -189,6 +197,8 @@
#define SPI_NAND_CACHE_SIZE (SZ_4K + SZ_256)
#define SPI_MAX_TRANSFER_SIZE 511
+#define NFI_CHECK_MAX_TIMES 1000000
+
enum airoha_snand_mode {
SPI_MODE_AUTO,
SPI_MODE_MANUAL,
@@ -212,11 +222,19 @@ enum airoha_snand_rx_buswidth {
SPI_RX_BUSWIDTH_QUAD = 0x0f,
};
+enum airoha_snand_nfi_buswidth {
+ SPI_NFI_BUSWIDTH_SINGLE = 0,
+ SPI_NFI_BUSWIDTH_DUAL = 1,
+ SPI_NFI_BUSWIDTH_QUAD = 2,
+};
+
struct airoha_snand_priv {
struct regmap *regmap_ctrl;
struct regmap *regmap_nfi;
struct clk *spi_clk;
+ int dma;
+
struct {
size_t page_size;
size_t sec_size;
@@ -567,6 +585,485 @@ static int airoha_snand_nfi_config(struct airoha_snand_priv *priv)
SPI_NFI_CUS_SEC_SIZE, val);
}
+static int airoha_snand_dma_on(struct airoha_snand_priv *priv)
+{
+ int err;
+
+ err = airoha_snand_set_mode(priv, SPI_MODE_DMA);
+ if (err)
+ return err;
+
+ err = regmap_write(priv->regmap_ctrl,
+ REG_SPI_CTRL_READ_IDLE_EN, 1);
+ if (err)
+ return err;
+
+ err = regmap_write(priv->regmap_ctrl, REG_SPI_CTRL_DUMMY, 0);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int airoha_snand_dma_off(struct airoha_snand_priv *priv)
+{
+ int err;
+
+ err = airoha_snand_set_mode(priv, SPI_MODE_MANUAL);
+ if (err)
+ return err;
+
+ err = regmap_write(priv->regmap_ctrl, REG_SPI_CTRL_DUMMY, 0);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int airoha_snand_nfi_read_cache(struct airoha_snand_priv *priv,
+ enum airoha_snand_nfi_buswidth buswidth,
+ u32 opcode, u16 column, u32 len,
+ dma_addr_t addr)
+{
+ int i, err;
+ u32 val;
+
+ /* NFI init */
+ err = regmap_update_bits(priv->regmap_nfi, REG_SPI_NFI_INTR_EN,
+ SPI_NFI_ALL_IRQ_EN, SPI_NFI_AHB_DONE_EN);
+ if (err)
+ return err;
+
+ /* NFI reset */
+ err = regmap_write(priv->regmap_nfi, REG_SPI_NFI_CON,
+ SPI_NFI_FIFO_FLUSH | SPI_NFI_RST);
+ if (err)
+ return err;
+
+ /*
+ * NFI configure:
+ * - No AutoFMT (custom sector size (SECCUS) register will be used)
+ * - No SoC's hardware ECC (flash internal ECC will be used)
+ * - Use burst mode (faster, but requires 16 byte alignment for addresses)
+ * - Setup FSM operating process flow (NFI_CNFG_OP_READ)
+ * - Use DMA instead of PIO for data reading (NFI_CNFG_READ_EN is set)
+ * - Use AHB bus for DMA transfer
+ */
+ err = regmap_update_bits(priv->regmap_nfi, REG_SPI_NFI_CNFG,
+ SPI_NFI_DMA_MODE |
+ SPI_NFI_READ_MODE |
+ SPI_NFI_DMA_BURST_EN |
+ SPI_NFI_HW_ECC_EN |
+ SPI_NFI_AUTO_FDM_EN |
+ SPI_NFI_OPMODE,
+ SPI_NFI_DMA_MODE |
+ SPI_NFI_READ_MODE |
+ SPI_NFI_DMA_BURST_EN |
+ FIELD_PREP(SPI_NFI_OPMODE, 6));
+ if (err)
+ return err;
+
+ /* Configure NFI for reading */
+ err = regmap_write(priv->regmap_nfi, REG_SPI_NFI_CMD, 0);
+ if (err)
+ return err;
+
+ /*
+ * Setup transfer length:
+ * - Enable custom sector size feature (NFI_SECCUS register)
+ * - Set custom sector size (NFI_SECCUS register)
+ * - Set number of sector will be read (NFI_CON register)
+ * - Set read data byte number (NFI_SNF_MISC_CTL2 register)
+ *
+ * The following rule MUST be met:
+ * transfer_length =
+ * = NFI_SNF_MISC_CTL2.read_data_byte_number =
+ * = NFI_CON.sector_number * NFI_SECCUS.custom_sector_size
+ *
+ * We will use the following settings:
+ * NFI_CON.sector_number = 1,
+ * NFI_SECCUS.custom_sector_size = transfer_length,
+ * NFI_SNF_MISC_CTL2.read_data_byte_number = transfer_length
+ */
+ err = regmap_update_bits(priv->regmap_nfi, REG_SPI_NFI_CON,
+ SPI_NFI_SEC_NUM,
+ FIELD_PREP(SPI_NFI_SEC_NUM, 1));
+ if (err)
+ return err;
+
+ err = regmap_update_bits(priv->regmap_nfi, REG_SPI_NFI_SECCUS_SIZE,
+ SPI_NFI_CUS_SEC_SIZE |
+ SPI_NFI_CUS_SEC_SIZE_EN,
+ FIELD_PREP(SPI_NFI_CUS_SEC_SIZE, len) |
+ SPI_NFI_CUS_SEC_SIZE_EN);
+ if (err)
+ return err;
+
+ err = regmap_update_bits(priv->regmap_nfi, REG_SPI_NFI_SNF_MISC_CTL2,
+ SPI_NFI_READ_DATA_BYTE_NUM,
+ FIELD_PREP(SPI_NFI_READ_DATA_BYTE_NUM, len));
+ if (err)
+ return err;
+
+ err = regmap_write(priv->regmap_nfi, REG_SPI_NFI_STRADDR, addr);
+ if (err)
+ return err;
+
+ err = regmap_write(priv->regmap_nfi, REG_SPI_NFI_RD_CTL2,
+ FIELD_PREP(SPI_NFI_DATA_READ_CMD, opcode));
+ if (err)
+ return err;
+
+ err = regmap_write(priv->regmap_nfi, REG_SPI_NFI_RD_CTL3,
+ FIELD_PREP(SPI_NFI_DATA_READ_ADDRESS, column));
+ if (err)
+ return err;
+
+ err = regmap_write(priv->regmap_nfi, REG_SPI_NFI_SNF_MISC_CTL,
+ FIELD_PREP(SPI_NFI_DATA_READ_WR_MODE, buswidth));
+ if (err)
+ return err;
+
+ /* trigger DMA read operation */
+ err = regmap_clear_bits(priv->regmap_nfi, REG_SPI_NFI_CON,
+ SPI_NFI_RD_TRIG);
+ if (err)
+ return err;
+
+ err = regmap_set_bits(priv->regmap_nfi, REG_SPI_NFI_CON,
+ SPI_NFI_RD_TRIG);
+ if (err)
+ return err;
+
+ /* is reading finished ? */
+ for (i = 0; i < NFI_CHECK_MAX_TIMES; i++) {
+ err = regmap_read(priv->regmap_nfi,
+ REG_SPI_NFI_SNF_STA_CTL1, &val);
+ if (err)
+ return err;
+
+ if (val & SPI_NFI_READ_FROM_CACHE_DONE) {
+ err = regmap_set_bits(priv->regmap_nfi,
+ REG_SPI_NFI_SNF_STA_CTL1,
+ SPI_NFI_READ_FROM_CACHE_DONE);
+ if (err)
+ return err;
+
+ break;
+ }
+ }
+
+ if (i == NFI_CHECK_MAX_TIMES)
+ return -ETIMEDOUT;
+
+ /* is DMA transfer completed ? */
+ for (i = 0; i < NFI_CHECK_MAX_TIMES; i++) {
+ err = regmap_read(priv->regmap_nfi,
+ REG_SPI_NFI_INTR, &val);
+ if (err)
+ return err;
+
+ if (val & SPI_NFI_AHB_DONE)
+ break;
+ }
+
+ if (i == NFI_CHECK_MAX_TIMES)
+ return -ETIMEDOUT;
+
+ udelay(1);
+
+ return 0;
+}
+
+static int airoha_snand_nfi_prog_cache(struct airoha_snand_priv *priv,
+ enum airoha_snand_nfi_buswidth buswidth,
+ u32 opcode, u16 column, u32 len,
+ dma_addr_t addr)
+{
+ int i, err;
+ u32 val;
+
+ /* NFI init */
+ err = regmap_update_bits(priv->regmap_nfi, REG_SPI_NFI_INTR_EN,
+ SPI_NFI_ALL_IRQ_EN, SPI_NFI_AHB_DONE_EN);
+ if (err)
+ return err;
+
+ /* NFI reset */
+ err = regmap_write(priv->regmap_nfi, REG_SPI_NFI_CON,
+ SPI_NFI_FIFO_FLUSH | SPI_NFI_RST);
+ if (err)
+ return err;
+
+ /*
+ * NFI configure:
+ * - No AutoFMT (custom sector size (SECCUS) register will be used)
+ * - No SoC's hardware ECC (flash internal ECC will be used)
+ * - Use burst mode (faster, but requires 16 byte alignment for addresses)
+ * - Setup FSM operating process flow (NFI_CNFG_OP_PRGM)
+ * - Use DMA instead of PIO for data writing (NFI_CNFG_READ_EN is cleared)
+ * - Use AHB bus for DMA transfer
+ */
+ err = regmap_update_bits(priv->regmap_nfi, REG_SPI_NFI_CNFG,
+ SPI_NFI_DMA_MODE |
+ SPI_NFI_READ_MODE |
+ SPI_NFI_DMA_BURST_EN |
+ SPI_NFI_HW_ECC_EN |
+ SPI_NFI_AUTO_FDM_EN |
+ SPI_NFI_OPMODE,
+ SPI_NFI_DMA_MODE |
+ SPI_NFI_DMA_BURST_EN |
+ FIELD_PREP(SPI_NFI_OPMODE, 3));
+ if (err)
+ return err;
+
+ /* Configure NFI for writing */
+ err = regmap_write(priv->regmap_nfi, REG_SPI_NFI_CMD,
+ SPI_NFI_CMD_WRITE);
+ if (err)
+ return err;
+
+ /*
+ * Setup transfer length:
+ * - Enable custom sector size feature (NFI_SECCUS register)
+ * - Set custom sector size (NFI_SECCUS register)
+ * - Set number of sector will be written (NFI_CON register)
+ * - Set write data byte number (NFI_SNF_MISC_CTL2 register)
+ *
+ * The following rule MUST be met:
+ * transfer_length =
+ * = NFI_SNF_MISC_CTL2.write_data_byte_number =
+ * = NFI_CON.sector_number * NFI_SECCUS.custom_sector_size
+ *
+ * We will use the following settings:
+ * NFI_CON.sector_number = 1,
+ * NFI_SECCUS.custom_sector_size = transfer_length,
+ * NFI_SNF_MISC_CTL2.write_data_byte_number = transfer_length
+ */
+ err = regmap_update_bits(priv->regmap_nfi, REG_SPI_NFI_CON,
+ SPI_NFI_SEC_NUM,
+ FIELD_PREP(SPI_NFI_SEC_NUM, 1));
+ if (err)
+ return err;
+
+ err = regmap_update_bits(priv->regmap_nfi, REG_SPI_NFI_SECCUS_SIZE,
+ SPI_NFI_CUS_SEC_SIZE |
+ SPI_NFI_CUS_SEC_SIZE_EN,
+ FIELD_PREP(SPI_NFI_CUS_SEC_SIZE, len) |
+ SPI_NFI_CUS_SEC_SIZE_EN);
+ if (err)
+ return err;
+
+ err = regmap_update_bits(priv->regmap_nfi, REG_SPI_NFI_SNF_MISC_CTL2,
+ SPI_NFI_PROG_LOAD_BYTE_NUM,
+ FIELD_PREP(SPI_NFI_PROG_LOAD_BYTE_NUM, len));
+ if (err)
+ return err;
+
+ err = regmap_write(priv->regmap_nfi, REG_SPI_NFI_STRADDR, addr);
+ if (err)
+ return err;
+
+ err = regmap_write(priv->regmap_nfi, REG_SPI_NFI_PG_CTL1,
+ FIELD_PREP(SPI_NFI_PG_LOAD_CMD, opcode));
+ if (err)
+ return err;
+
+ err = regmap_write(priv->regmap_nfi, REG_SPI_NFI_PG_CTL2,
+ FIELD_PREP(SPI_NFI_PG_LOAD_ADDR, column));
+ if (err)
+ return err;
+
+ err = regmap_write(priv->regmap_nfi, REG_SPI_NFI_SNF_MISC_CTL,
+ FIELD_PREP(SPI_NFI_DATA_READ_WR_MODE, buswidth));
+ if (err)
+ return err;
+
+ /* trigger DMA writing operation */
+ err = regmap_clear_bits(priv->regmap_nfi, REG_SPI_NFI_CON,
+ SPI_NFI_WR_TRIG);
+ if (err)
+ return err;
+
+ err = regmap_set_bits(priv->regmap_nfi, REG_SPI_NFI_CON,
+ SPI_NFI_WR_TRIG);
+ if (err)
+ return err;
+
+ udelay(1);
+
+ /* is DMA transfer completed ? */
+ for (i = 0; i < NFI_CHECK_MAX_TIMES; i++) {
+ err = regmap_read(priv->regmap_nfi,
+ REG_SPI_NFI_INTR, &val);
+ if (err)
+ return err;
+
+ if (val & SPI_NFI_AHB_DONE)
+ break;
+ }
+
+ if (i == NFI_CHECK_MAX_TIMES)
+ return -ETIMEDOUT;
+
+ /* is cache writing finished ? */
+ for (i = 0; i < NFI_CHECK_MAX_TIMES; i++) {
+ err = regmap_read(priv->regmap_nfi,
+ REG_SPI_NFI_SNF_STA_CTL1, &val);
+ if (err)
+ return err;
+
+ if (val & SPI_NFI_LOAD_TO_CACHE_DONE) {
+ err = regmap_set_bits(priv->regmap_nfi,
+ REG_SPI_NFI_SNF_STA_CTL1,
+ SPI_NFI_LOAD_TO_CACHE_DONE);
+ if (err)
+ return err;
+
+ break;
+ }
+ }
+
+ if (i == NFI_CHECK_MAX_TIMES)
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+static int airoha_snand_read_cache_dma(struct airoha_snand_priv *priv,
+ const struct spi_mem_op *op,
+ char *dma_aligned_buf)
+{
+ int err;
+ u32 mask, opcode;
+ dma_addr_t addr;
+ enum airoha_snand_nfi_buswidth nfi_buswidth;
+
+ mask = ARCH_DMA_MINALIGN - 1;
+ if (((op->addr.val & mask) != 0) ||
+ ((op->data.nbytes & mask) != 0) ||
+ (((uintptr_t)dma_aligned_buf & mask) != 0)) {
+ /* Not properly aligned */
+ return -EOPNOTSUPP;
+ }
+
+ if (op->addr.nbytes != 2) {
+ /*
+ * flashes like gigadevice GD5F4GQ4RC
+ * and similar is not supported
+ */
+ return -EOPNOTSUPP;
+ }
+
+ /*
+ * DUALIO and QUADIO opcodes are not supported by DMA,
+ * replace them with supported opcodes. Figure out
+ * required buswidth.
+ */
+ switch (op->cmd.opcode) {
+ case SPI_NAND_OP_READ_FROM_CACHE_SINGLE:
+ case SPI_NAND_OP_READ_FROM_CACHE_SINGLE_FAST:
+ opcode = op->cmd.opcode;
+ nfi_buswidth = SPI_NFI_BUSWIDTH_SINGLE;
+ break;
+ case SPI_NAND_OP_READ_FROM_CACHE_DUAL:
+ case SPI_NAND_OP_READ_FROM_CACHE_DUALIO:
+ opcode = SPI_NAND_OP_READ_FROM_CACHE_DUAL;
+ nfi_buswidth = SPI_NFI_BUSWIDTH_DUAL;
+ break;
+ case SPI_NAND_OP_READ_FROM_CACHE_QUAD:
+ case SPI_NAND_OP_READ_FROM_CACHE_QUADIO:
+ opcode = SPI_NAND_OP_READ_FROM_CACHE_QUAD;
+ nfi_buswidth = SPI_NFI_BUSWIDTH_QUAD;
+ break;
+ default:
+ /* unknown opcode */
+ return -EOPNOTSUPP;
+ }
+
+ addr = dma_map_single(dma_aligned_buf, op->data.nbytes,
+ DMA_FROM_DEVICE);
+
+ err = airoha_snand_dma_on(priv);
+ if (err)
+ return err;
+
+ err = airoha_snand_nfi_read_cache(priv, nfi_buswidth,
+ opcode, op->addr.val,
+ op->data.nbytes, addr);
+ if (err)
+ return err;
+
+ err = airoha_snand_dma_off(priv);
+ if (err)
+ return err;
+
+ dma_unmap_single(addr, op->data.nbytes, DMA_FROM_DEVICE);
+
+ return 0;
+}
+
+static int airoha_snand_write_cache_dma(struct airoha_snand_priv *priv,
+ const struct spi_mem_op *op,
+ const char *dma_aligned_buf)
+{
+ u32 mask;
+ int err;
+ dma_addr_t addr;
+ enum airoha_snand_nfi_buswidth nfi_buswidth;
+
+ mask = ARCH_DMA_MINALIGN - 1;
+ if (((op->addr.val & mask) != 0) ||
+ ((op->data.nbytes & mask) != 0) ||
+ (((uintptr_t)dma_aligned_buf & mask) != 0)) {
+ /* Not properly aligned */
+ return -EOPNOTSUPP;
+ }
+
+ if (op->addr.nbytes != 2) {
+ /*
+ * flashes like gigadevice GD5F4GQ4RC
+ * and similar is not supported
+ */
+ return -EOPNOTSUPP;
+ }
+
+ switch (op->cmd.opcode) {
+ case SPI_NAND_OP_PROGRAM_LOAD_SINGLE:
+ case SPI_NAND_OP_PROGRAM_LOAD_RAMDOM_SINGLE:
+ nfi_buswidth = SPI_NFI_BUSWIDTH_SINGLE;
+ break;
+ case SPI_NAND_OP_PROGRAM_LOAD_QUAD:
+ case SPI_NAND_OP_PROGRAM_LOAD_RAMDON_QUAD:
+ nfi_buswidth = SPI_NFI_BUSWIDTH_QUAD;
+ break;
+ default:
+ /* unknown opcode */
+ return -EOPNOTSUPP;
+ }
+
+ addr = dma_map_single((void *)dma_aligned_buf, op->data.nbytes,
+ DMA_TO_DEVICE);
+
+ err = airoha_snand_dma_on(priv);
+ if (err)
+ return err;
+
+ err = airoha_snand_nfi_prog_cache(priv, nfi_buswidth,
+ op->cmd.opcode, op->addr.val,
+ op->data.nbytes, addr);
+ if (err)
+ return err;
+
+ err = airoha_snand_dma_off(priv);
+ if (err)
+ return err;
+
+ dma_unmap_single(addr, op->data.nbytes, DMA_TO_DEVICE);
+
+ return 0;
+}
+
static bool airoha_snand_supports_op(struct spi_slave *slave,
const struct spi_mem_op *op)
{
@@ -589,6 +1086,32 @@ static int airoha_snand_exec_op(struct spi_slave *slave,
priv = dev_get_priv(bus);
+ if (priv->dma) {
+ switch (op->cmd.opcode) {
+ case SPI_NAND_OP_READ_FROM_CACHE_SINGLE:
+ case SPI_NAND_OP_READ_FROM_CACHE_SINGLE_FAST:
+ case SPI_NAND_OP_READ_FROM_CACHE_DUAL:
+ case SPI_NAND_OP_READ_FROM_CACHE_DUALIO:
+ case SPI_NAND_OP_READ_FROM_CACHE_QUAD:
+ case SPI_NAND_OP_READ_FROM_CACHE_QUADIO:
+ err = airoha_snand_read_cache_dma(priv, op,
+ op->data.buf.in);
+ break;
+ case SPI_NAND_OP_PROGRAM_LOAD_SINGLE:
+ case SPI_NAND_OP_PROGRAM_LOAD_RAMDOM_SINGLE:
+ case SPI_NAND_OP_PROGRAM_LOAD_QUAD:
+ case SPI_NAND_OP_PROGRAM_LOAD_RAMDON_QUAD:
+ err = airoha_snand_write_cache_dma(priv, op,
+ op->data.buf.out);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ }
+
+ if (err != -EOPNOTSUPP)
+ return err;
+ }
+
op_len = op->cmd.nbytes;
addr_len = op->addr.nbytes;
dummy_len = op->dummy.nbytes;
@@ -679,6 +1202,9 @@ static int airoha_snand_probe(struct udevice *dev)
}
clk_enable(priv->spi_clk);
+ // force dma usage
+ priv->dma = 1;
+
return airoha_snand_nfi_init(priv);
}
--
2.47.2
More information about the U-Boot
mailing list