[PATCH V2] net: dwc_eth_qos: Pad descriptors to cacheline size

Marek Vasut marex at denx.de
Wed Jan 6 15:14:38 CET 2021


The DWMAC4 IP has the possibility to skip up to 7 AXI bus width size words
after the descriptor. Use this to pad the descriptors to cacheline size and
remove the need for noncached memory altogether. Moreover, this lets Tegra
use the generic cache flush / invalidate operations.

Signed-off-by: Marek Vasut <marex at denx.de>
Cc: Joe Hershberger <joe.hershberger at ni.com>
Cc: Patrice Chotard <patrice.chotard at st.com>
Cc: Patrick Delaunay <patrick.delaunay at st.com>
Cc: Ramon Fried <rfried.dev at gmail.com>
Cc: Stephen Warren <swarren at nvidia.com>
---
V2: Consider AXI bus width, disable noncached memory on STM32MP1 and Tegra
---
 drivers/net/dwc_eth_qos.c           | 129 +++++++++++-----------------
 include/configs/stm32mp1.h          |   1 -
 include/configs/tegra-common-post.h |   2 -
 3 files changed, 51 insertions(+), 81 deletions(-)

diff --git a/drivers/net/dwc_eth_qos.c b/drivers/net/dwc_eth_qos.c
index db1102562f6..e81be71f840 100644
--- a/drivers/net/dwc_eth_qos.c
+++ b/drivers/net/dwc_eth_qos.c
@@ -209,6 +209,7 @@ struct eqos_dma_regs {
 #define EQOS_DMA_SYSBUS_MODE_BLEN8			BIT(2)
 #define EQOS_DMA_SYSBUS_MODE_BLEN4			BIT(1)
 
+#define EQOS_DMA_CH0_CONTROL_DSL_SHIFT			18
 #define EQOS_DMA_CH0_CONTROL_PBLX8			BIT(16)
 
 #define EQOS_DMA_CH0_TX_CONTROL_TXPBL_SHIFT		16
@@ -239,37 +240,15 @@ struct eqos_tegra186_regs {
 #define EQOS_AUTO_CAL_STATUS_ACTIVE			BIT(31)
 
 /* Descriptors */
-
-#define EQOS_DESCRIPTOR_WORDS	4
-#define EQOS_DESCRIPTOR_SIZE	(EQOS_DESCRIPTOR_WORDS * 4)
 /* We assume ARCH_DMA_MINALIGN >= 16; 16 is the EQOS HW minimum */
 #define EQOS_DESCRIPTOR_ALIGN	ARCH_DMA_MINALIGN
 #define EQOS_DESCRIPTORS_TX	4
 #define EQOS_DESCRIPTORS_RX	4
 #define EQOS_DESCRIPTORS_NUM	(EQOS_DESCRIPTORS_TX + EQOS_DESCRIPTORS_RX)
-#define EQOS_DESCRIPTORS_SIZE	ALIGN(EQOS_DESCRIPTORS_NUM * \
-				      EQOS_DESCRIPTOR_SIZE, ARCH_DMA_MINALIGN)
 #define EQOS_BUFFER_ALIGN	ARCH_DMA_MINALIGN
 #define EQOS_MAX_PACKET_SIZE	ALIGN(1568, ARCH_DMA_MINALIGN)
 #define EQOS_RX_BUFFER_SIZE	(EQOS_DESCRIPTORS_RX * EQOS_MAX_PACKET_SIZE)
 
-/*
- * Warn if the cache-line size is larger than the descriptor size. In such
- * cases the driver will likely fail because the CPU needs to flush the cache
- * when requeuing RX buffers, therefore descriptors written by the hardware
- * may be discarded. Architectures with full IO coherence, such as x86, do not
- * experience this issue, and hence are excluded from this condition.
- *
- * This can be fixed by defining CONFIG_SYS_NONCACHED_MEMORY which will cause
- * the driver to allocate descriptors from a pool of non-cached memory.
- */
-#if EQOS_DESCRIPTOR_SIZE < ARCH_DMA_MINALIGN
-#if !defined(CONFIG_SYS_NONCACHED_MEMORY) && \
-	!CONFIG_IS_ENABLED(SYS_DCACHE_OFF) && !defined(CONFIG_X86)
-#warning Cache line size is larger than descriptor size
-#endif
-#endif
-
 struct eqos_desc {
 	u32 des0;
 	u32 des1;
@@ -282,12 +261,17 @@ struct eqos_desc {
 #define EQOS_DESC3_LD		BIT(28)
 #define EQOS_DESC3_BUF1V	BIT(24)
 
+#define EQOS_AXI_WIDTH_32	4
+#define EQOS_AXI_WIDTH_64	8
+#define EQOS_AXI_WIDTH_128	16
+
 struct eqos_config {
 	bool reg_access_always_ok;
 	int mdio_wait;
 	int swr_wait;
 	int config_mac;
 	int config_mac_mdio;
+	unsigned int axi_bus_width;
 	phy_interface_t (*interface)(struct udevice *dev);
 	struct eqos_ops *ops;
 };
@@ -330,9 +314,8 @@ struct eqos_priv {
 	int phyaddr;
 	u32 max_speed;
 	void *descs;
-	struct eqos_desc *tx_descs;
-	struct eqos_desc *rx_descs;
 	int tx_desc_idx, rx_desc_idx;
+	unsigned int desc_size;
 	void *tx_dma_buf;
 	void *rx_dma_buf;
 	void *rx_pkt;
@@ -358,63 +341,42 @@ struct eqos_priv {
  * not have the same constraints since they are 1536 bytes large, so they
  * are unlikely to share cache-lines.
  */
-static void *eqos_alloc_descs(unsigned int num)
+static void *eqos_alloc_descs(struct eqos_priv *eqos, unsigned int num)
 {
-#ifdef CONFIG_SYS_NONCACHED_MEMORY
-	return (void *)noncached_alloc(EQOS_DESCRIPTORS_SIZE,
-				      EQOS_DESCRIPTOR_ALIGN);
-#else
-	return memalign(EQOS_DESCRIPTOR_ALIGN, EQOS_DESCRIPTORS_SIZE);
-#endif
+	eqos->desc_size = max(sizeof(struct eqos_desc),
+			      (unsigned int)ARCH_DMA_MINALIGN);
+
+	return memalign(eqos->desc_size, num * eqos->desc_size);
 }
 
 static void eqos_free_descs(void *descs)
 {
-#ifdef CONFIG_SYS_NONCACHED_MEMORY
-	/* FIXME: noncached_alloc() has no opposite */
-#else
 	free(descs);
-#endif
 }
 
-static void eqos_inval_desc_tegra186(void *desc)
+static struct eqos_desc *eqos_get_desc(struct eqos_priv *eqos,
+				       unsigned int num, bool rx)
 {
-#ifndef CONFIG_SYS_NONCACHED_MEMORY
-	unsigned long start = (unsigned long)desc & ~(ARCH_DMA_MINALIGN - 1);
-	unsigned long end = ALIGN(start + EQOS_DESCRIPTOR_SIZE,
-				  ARCH_DMA_MINALIGN);
-
-	invalidate_dcache_range(start, end);
-#endif
+	return eqos->descs +
+		((rx ? EQOS_DESCRIPTORS_TX : 0) + num) * eqos->desc_size;
 }
 
 static void eqos_inval_desc_generic(void *desc)
 {
-#ifndef CONFIG_SYS_NONCACHED_MEMORY
-	unsigned long start = rounddown((unsigned long)desc, ARCH_DMA_MINALIGN);
-	unsigned long end = roundup((unsigned long)desc + EQOS_DESCRIPTOR_SIZE,
-				    ARCH_DMA_MINALIGN);
+	unsigned long start = (unsigned long)desc;
+	unsigned long end = ALIGN(start + sizeof(struct eqos_desc),
+				  ARCH_DMA_MINALIGN);
 
 	invalidate_dcache_range(start, end);
-#endif
-}
-
-static void eqos_flush_desc_tegra186(void *desc)
-{
-#ifndef CONFIG_SYS_NONCACHED_MEMORY
-	flush_cache((unsigned long)desc, EQOS_DESCRIPTOR_SIZE);
-#endif
 }
 
 static void eqos_flush_desc_generic(void *desc)
 {
-#ifndef CONFIG_SYS_NONCACHED_MEMORY
-	unsigned long start = rounddown((unsigned long)desc, ARCH_DMA_MINALIGN);
-	unsigned long end = roundup((unsigned long)desc + EQOS_DESCRIPTOR_SIZE,
-				    ARCH_DMA_MINALIGN);
+	unsigned long start = (unsigned long)desc;
+	unsigned long end = ALIGN(start + sizeof(struct eqos_desc),
+				  ARCH_DMA_MINALIGN);
 
 	flush_dcache_range(start, end);
-#endif
 }
 
 static void eqos_inval_buffer_tegra186(void *buf, size_t size)
@@ -1167,6 +1129,7 @@ static int eqos_start(struct udevice *dev)
 	ulong rate;
 	u32 val, tx_fifo_sz, rx_fifo_sz, tqs, rqs, pbl;
 	ulong last_rx_desc;
+	ulong desc_pad;
 
 	debug("%s(dev=%p):\n", __func__, dev);
 
@@ -1405,8 +1368,12 @@ static int eqos_start(struct udevice *dev)
 			EQOS_MAX_PACKET_SIZE <<
 			EQOS_DMA_CH0_RX_CONTROL_RBSZ_SHIFT);
 
+	desc_pad = (eqos->desc_size - sizeof(struct eqos_desc)) /
+		   eqos->config->axi_bus_width;
+
 	setbits_le32(&eqos->dma_regs->ch0_control,
-		     EQOS_DMA_CH0_CONTROL_PBLX8);
+		     EQOS_DMA_CH0_CONTROL_PBLX8 |
+		     (desc_pad << EQOS_DMA_CH0_CONTROL_DSL_SHIFT));
 
 	/*
 	 * Burst length must be < 1/2 FIFO size.
@@ -1435,9 +1402,15 @@ static int eqos_start(struct udevice *dev)
 
 	/* Set up descriptors */
 
-	memset(eqos->descs, 0, EQOS_DESCRIPTORS_SIZE);
+	memset(eqos->descs, 0, eqos->desc_size * EQOS_DESCRIPTORS_NUM);
+
+	for (i = 0; i < EQOS_DESCRIPTORS_TX; i++) {
+		struct eqos_desc *tx_desc = eqos_get_desc(eqos, i, false);
+		eqos->config->ops->eqos_flush_desc(tx_desc);
+	}
+
 	for (i = 0; i < EQOS_DESCRIPTORS_RX; i++) {
-		struct eqos_desc *rx_desc = &(eqos->rx_descs[i]);
+		struct eqos_desc *rx_desc = eqos_get_desc(eqos, i, true);
 		rx_desc->des0 = (u32)(ulong)(eqos->rx_dma_buf +
 					     (i * EQOS_MAX_PACKET_SIZE));
 		rx_desc->des3 = EQOS_DESC3_OWN | EQOS_DESC3_BUF1V;
@@ -1449,12 +1422,14 @@ static int eqos_start(struct udevice *dev)
 	}
 
 	writel(0, &eqos->dma_regs->ch0_txdesc_list_haddress);
-	writel((ulong)eqos->tx_descs, &eqos->dma_regs->ch0_txdesc_list_address);
+	writel((ulong)eqos_get_desc(eqos, 0, false),
+		&eqos->dma_regs->ch0_txdesc_list_address);
 	writel(EQOS_DESCRIPTORS_TX - 1,
 	       &eqos->dma_regs->ch0_txdesc_ring_length);
 
 	writel(0, &eqos->dma_regs->ch0_rxdesc_list_haddress);
-	writel((ulong)eqos->rx_descs, &eqos->dma_regs->ch0_rxdesc_list_address);
+	writel((ulong)eqos_get_desc(eqos, 0, true),
+		&eqos->dma_regs->ch0_rxdesc_list_address);
 	writel(EQOS_DESCRIPTORS_RX - 1,
 	       &eqos->dma_regs->ch0_rxdesc_ring_length);
 
@@ -1473,7 +1448,7 @@ static int eqos_start(struct udevice *dev)
 	 * that's not distinguishable from none of the descriptors being
 	 * available.
 	 */
-	last_rx_desc = (ulong)&(eqos->rx_descs[(EQOS_DESCRIPTORS_RX - 1)]);
+	last_rx_desc = (ulong)eqos_get_desc(eqos, EQOS_DESCRIPTORS_RX - 1, true);
 	writel(last_rx_desc, &eqos->dma_regs->ch0_rxdesc_tail_pointer);
 
 	eqos->started = true;
@@ -1558,7 +1533,7 @@ static int eqos_send(struct udevice *dev, void *packet, int length)
 	memcpy(eqos->tx_dma_buf, packet, length);
 	eqos->config->ops->eqos_flush_buffer(eqos->tx_dma_buf, length);
 
-	tx_desc = &(eqos->tx_descs[eqos->tx_desc_idx]);
+	tx_desc = eqos_get_desc(eqos, eqos->tx_desc_idx, false);
 	eqos->tx_desc_idx++;
 	eqos->tx_desc_idx %= EQOS_DESCRIPTORS_TX;
 
@@ -1573,7 +1548,7 @@ static int eqos_send(struct udevice *dev, void *packet, int length)
 	tx_desc->des3 = EQOS_DESC3_OWN | EQOS_DESC3_FD | EQOS_DESC3_LD | length;
 	eqos->config->ops->eqos_flush_desc(tx_desc);
 
-	writel((ulong)(&(eqos->tx_descs[eqos->tx_desc_idx])),
+	writel((ulong)eqos_get_desc(eqos, eqos->tx_desc_idx, false),
 		&eqos->dma_regs->ch0_txdesc_tail_pointer);
 
 	for (i = 0; i < 1000000; i++) {
@@ -1596,7 +1571,7 @@ static int eqos_recv(struct udevice *dev, int flags, uchar **packetp)
 
 	debug("%s(dev=%p, flags=%x):\n", __func__, dev, flags);
 
-	rx_desc = &(eqos->rx_descs[eqos->rx_desc_idx]);
+	rx_desc = eqos_get_desc(eqos, eqos->rx_desc_idx, true);
 	eqos->config->ops->eqos_inval_desc(rx_desc);
 	if (rx_desc->des3 & EQOS_DESC3_OWN) {
 		debug("%s: RX packet not available\n", __func__);
@@ -1631,7 +1606,7 @@ static int eqos_free_pkt(struct udevice *dev, uchar *packet, int length)
 
 	eqos->config->ops->eqos_inval_buffer(packet, length);
 
-	rx_desc = &(eqos->rx_descs[eqos->rx_desc_idx]);
+	rx_desc = eqos_get_desc(eqos, eqos->rx_desc_idx, true);
 
 	rx_desc->des0 = 0;
 	mb();
@@ -1663,17 +1638,12 @@ static int eqos_probe_resources_core(struct udevice *dev)
 
 	debug("%s(dev=%p):\n", __func__, dev);
 
-	eqos->descs = eqos_alloc_descs(EQOS_DESCRIPTORS_TX +
-				       EQOS_DESCRIPTORS_RX);
+	eqos->descs = eqos_alloc_descs(eqos, EQOS_DESCRIPTORS_NUM);
 	if (!eqos->descs) {
 		debug("%s: eqos_alloc_descs() failed\n", __func__);
 		ret = -ENOMEM;
 		goto err;
 	}
-	eqos->tx_descs = (struct eqos_desc *)eqos->descs;
-	eqos->rx_descs = (eqos->tx_descs + EQOS_DESCRIPTORS_TX);
-	debug("%s: tx_descs=%p, rx_descs=%p\n", __func__, eqos->tx_descs,
-	      eqos->rx_descs);
 
 	eqos->tx_dma_buf = memalign(EQOS_BUFFER_ALIGN, EQOS_MAX_PACKET_SIZE);
 	if (!eqos->tx_dma_buf) {
@@ -2083,8 +2053,8 @@ static const struct eth_ops eqos_ops = {
 };
 
 static struct eqos_ops eqos_tegra186_ops = {
-	.eqos_inval_desc = eqos_inval_desc_tegra186,
-	.eqos_flush_desc = eqos_flush_desc_tegra186,
+	.eqos_inval_desc = eqos_inval_desc_generic,
+	.eqos_flush_desc = eqos_flush_desc_generic,
 	.eqos_inval_buffer = eqos_inval_buffer_tegra186,
 	.eqos_flush_buffer = eqos_flush_buffer_tegra186,
 	.eqos_probe_resources = eqos_probe_resources_tegra186,
@@ -2105,6 +2075,7 @@ static const struct eqos_config __maybe_unused eqos_tegra186_config = {
 	.swr_wait = 10,
 	.config_mac = EQOS_MAC_RXQ_CTRL0_RXQ0EN_ENABLED_DCB,
 	.config_mac_mdio = EQOS_MAC_MDIO_ADDRESS_CR_20_35,
+	.axi_bus_width = EQOS_AXI_WIDTH_128,
 	.interface = eqos_get_interface_tegra186,
 	.ops = &eqos_tegra186_ops
 };
@@ -2132,6 +2103,7 @@ static const struct eqos_config __maybe_unused eqos_stm32_config = {
 	.swr_wait = 50,
 	.config_mac = EQOS_MAC_RXQ_CTRL0_RXQ0EN_ENABLED_AV,
 	.config_mac_mdio = EQOS_MAC_MDIO_ADDRESS_CR_250_300,
+	.axi_bus_width = EQOS_AXI_WIDTH_64,
 	.interface = eqos_get_interface_stm32,
 	.ops = &eqos_stm32_ops
 };
@@ -2159,6 +2131,7 @@ struct eqos_config __maybe_unused eqos_imx_config = {
 	.swr_wait = 50,
 	.config_mac = EQOS_MAC_RXQ_CTRL0_RXQ0EN_ENABLED_DCB,
 	.config_mac_mdio = EQOS_MAC_MDIO_ADDRESS_CR_250_300,
+	.axi_bus_width = EQOS_AXI_WIDTH_64,
 	.interface = eqos_get_interface_imx,
 	.ops = &eqos_imx_ops
 };
diff --git a/include/configs/stm32mp1.h b/include/configs/stm32mp1.h
index 1f6cb2919b5..43b6d7d8afc 100644
--- a/include/configs/stm32mp1.h
+++ b/include/configs/stm32mp1.h
@@ -69,7 +69,6 @@
 
 /* Ethernet need */
 #ifdef CONFIG_DWC_ETH_QOS
-#define CONFIG_SYS_NONCACHED_MEMORY	(1 * SZ_1M)	/* 1M */
 #define CONFIG_SERVERIP                 192.168.1.1
 #define CONFIG_BOOTP_SERVERIP
 #define CONFIG_SYS_AUTOLOAD		"no"
diff --git a/include/configs/tegra-common-post.h b/include/configs/tegra-common-post.h
index fae0e761fb4..54481083e1d 100644
--- a/include/configs/tegra-common-post.h
+++ b/include/configs/tegra-common-post.h
@@ -18,8 +18,6 @@
 #define CONFIG_SYS_MALLOC_LEN		(4 << 20)	/* 4MB  */
 #endif
 
-#define CONFIG_SYS_NONCACHED_MEMORY	(1 << 20)	/* 1 MiB */
-
 #ifndef CONFIG_SPL_BUILD
 #ifndef BOOT_TARGET_DEVICES
 #define BOOT_TARGET_DEVICES(func) \
-- 
2.29.2



More information about the U-Boot mailing list