[RFC PATCH 2/2] net: Add Support for GENET Ethernet controller

Andre Przywara andre.przywara at arm.com
Fri Dec 13 17:50:05 CET 2019


On Fri, 13 Dec 2019 21:12:22 +0530
Amit Singh Tomar <amittomer25 at gmail.com> wrote:

Hi Amit,

thanks for your work on this.
As I have seen versions of this code before, here just some quick things (mostly stylistic) that I spotted earlier.

> This patch adds driver for GENET Ethernet controller.
> 
> It has been tested(ping and tftp works for small files) on
> Raspberry Pi 4.

I am afraid that needs to be more elaborate. You could mention Broadcom, also that it's only for v5 of it. And that it's based on reverse engineering the Linux driver.
Text in the cover letter will not make it into the repo, so you should mention this here again.

> 
> Signed-off-by: Amit Singh Tomar <amittomer25 at gmail.com>
> ---
>  configs/rpi_4_defconfig |   2 +
>  drivers/net/Kconfig     |   7 +
>  drivers/net/Makefile    |   1 +
>  drivers/net/bcmgenet.c  | 770 ++++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 780 insertions(+)
>  create mode 100644 drivers/net/bcmgenet.c
> 
> diff --git a/configs/rpi_4_defconfig b/configs/rpi_4_defconfig
> index 8cf1bb8..b0f9cf1 100644
> --- a/configs/rpi_4_defconfig
> +++ b/configs/rpi_4_defconfig
> @@ -24,6 +24,8 @@ CONFIG_DM_KEYBOARD=y
>  CONFIG_DM_MMC=y
>  CONFIG_MMC_SDHCI=y
>  CONFIG_MMC_SDHCI_BCM2835=y
> +CONFIG_DM_ETH=y

I am not sure that belongs into the individual defconfig file, I guess it should be in some Kconfig instead?

> +CONFIG_BCMGENET=y
>  CONFIG_PINCTRL=y
>  # CONFIG_PINCTRL_GENERIC is not set
>  # CONFIG_REQUIRE_SERIAL_CONSOLE is not set
> diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
> index 142a2c6..999714d 100644
> --- a/drivers/net/Kconfig
> +++ b/drivers/net/Kconfig
> @@ -136,6 +136,13 @@ config BCM6368_ETH
>  	help
>  	  This driver supports the BCM6368 Ethernet MAC.
>  
> +config BCMGENET
> +	bool "BCMGENET V5 support"
> +	depends on DM_ETH
> +	select PHYLIB
> +	help
> +	  This driver supports the BCMGENET Ethernet MAC.

Again, please mention Broadcom, also the RPi4 here.

> +
>  config DWC_ETH_QOS
>  	bool "Synopsys DWC Ethernet QOS device support"
>  	depends on DM_ETH
> diff --git a/drivers/net/Makefile b/drivers/net/Makefile
> index 3099183..6e0a688 100644
> --- a/drivers/net/Makefile
> +++ b/drivers/net/Makefile
> @@ -8,6 +8,7 @@ obj-$(CONFIG_AG7XXX) += ag7xxx.o
>  obj-$(CONFIG_ARMADA100_FEC) += armada100_fec.o
>  obj-$(CONFIG_BCM6348_ETH) += bcm6348-eth.o
>  obj-$(CONFIG_BCM6368_ETH) += bcm6368-eth.o
> +obj-$(CONFIG_BCMGENET) += bcmgenet.o
>  obj-$(CONFIG_DRIVER_AT91EMAC) += at91_emac.o
>  obj-$(CONFIG_DRIVER_AX88180) += ax88180.o
>  obj-$(CONFIG_BCM_SF2_ETH) += bcm-sf2-eth.o
> diff --git a/drivers/net/bcmgenet.c b/drivers/net/bcmgenet.c
> new file mode 100644
> index 0000000..bad5ffb
> --- /dev/null
> +++ b/drivers/net/bcmgenet.c
> @@ -0,0 +1,770 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +/*
> + * Ethernet driver for GENET controller found on RPI4.

Broadcom GENETv5.

And you could tell a bit about the history here, that it's based on the Linux driver, but limited to support v5 and do away with all the priority queues, etc.

> + *
> + */
> +
> +#include <asm/io.h>
> +#include <clk.h>
> +#include <cpu_func.h>
> +#include <dm.h>
> +#include <fdt_support.h>
> +#include <linux/err.h>
> +#include <malloc.h>
> +#include <miiphy.h>
> +#include <net.h>
> +#include <dm/of_access.h>
> +#include <dm/ofnode.h>
> +#include <linux/libfdt.h>
> +#include <linux/iopoll.h>
> +#include <asm/dma-mapping.h>
> +
> +/* Register definitions derived from Linux source */
> +#define SYS_REV_CTRL			 0x00
> +
> +#define SYS_PORT_CTRL			 0x04
> +#define PORT_MODE_EXT_GPHY		 3
> +
> +#define GENET_SYS_OFF			 0x0000
> +#define SYS_RBUF_FLUSH_CTRL		 (GENET_SYS_OFF  + 0x08)
> +#define SYS_TBUF_FLUSH_CTRL		 (GENET_SYS_OFF  + 0x0C)
> +
> +#define GENET_EXT_OFF			 0x0080
> +#define EXT_RGMII_OOB_CTRL               (GENET_EXT_OFF + 0x0C)
> +#define RGMII_MODE_EN_V123               BIT(0)
> +#define RGMII_LINK                       BIT(4)
> +#define OOB_DISABLE                      BIT(5)
> +#define RGMII_MODE_EN                    BIT(6)
> +#define ID_MODE_DIS                      BIT(16)
> +
> +#define GENET_RBUF_OFF			 0x0300
> +#define RBUF_FLUSH_CTRL_V1		 (GENET_RBUF_OFF + 0x04)
> +#define RBUF_TBUF_SIZE_CTRL              (GENET_RBUF_OFF + 0xb4)
> +#define RBUF_CTRL                        (GENET_RBUF_OFF + 0x00)
> +#define RBUF_64B_EN                      BIT(0)
> +#define RBUF_ALIGN_2B                    BIT(1)
> +#define RBUF_BAD_DIS                     BIT(2)

Thanks for aligning this - I guess it would be in an editor ;-)
But please try to use tabs consistently for indenting this.

> +
> +#define GENET_UMAC_OFF			 0x0800
> +#define UMAC_MIB_CTRL			 (GENET_UMAC_OFF + 0x580)
> +#define UMAC_MAX_FRAME_LEN		 (GENET_UMAC_OFF + 0x014)
> +#define UMAC_MAC0			 (GENET_UMAC_OFF + 0x00C)
> +#define UMAC_MAC1			 (GENET_UMAC_OFF + 0x010)
> +#define UMAC_CMD			 (GENET_UMAC_OFF + 0x008)
> +#define MDIO_CMD			 (GENET_UMAC_OFF + 0x614)
> +#define UMAC_TX_FLUSH			 (GENET_UMAC_OFF + 0x334)
> +#define MDIO_START_BUSY                  BIT(29)
> +#define MDIO_READ_FAIL                   BIT(28)
> +#define MDIO_RD                          (2 << 26)
> +#define MDIO_WR                          BIT(26)
> +#define MDIO_PMD_SHIFT                   21
> +#define MDIO_PMD_MASK                    0x1F
> +#define MDIO_REG_SHIFT                   16
> +#define MDIO_REG_MASK                    0x1F
> +
> +#define CMD_TX_EN			 BIT(0)
> +#define CMD_RX_EN			 BIT(1)
> +#define UMAC_SPEED_10			 0
> +#define UMAC_SPEED_100			 1
> +#define UMAC_SPEED_1000			 2
> +#define UMAC_SPEED_2500			 3
> +#define CMD_SPEED_SHIFT			 2
> +#define CMD_SPEED_MASK			 3
> +#define CMD_SW_RESET			 BIT(13)
> +#define CMD_LCL_LOOP_EN			 BIT(15)
> +#define CMD_TX_EN			 BIT(0)
> +#define CMD_RX_EN			 BIT(1)
> +
> +#define MIB_RESET_RX			 BIT(0)
> +#define MIB_RESET_RUNT			 BIT(1)
> +#define MIB_RESET_TX			 BIT(2)
> +
> +/* total number of Buffer Descriptors, same for Rx/Tx */
> +#define TOTAL_DESC			 256
> +
> +#define DEFAULT_Q                        0x10

I think that deserves a comment.
Maybe Florian can comment on what this odd "queue 16" is really about? It seems weird to support *17* queues and make the last one the default.

> +
> +/* Body(1500) + EH_SIZE(14) + VLANTAG(4) + BRCMTAG(6) + FCS(4) = 1528.
> + * 1536 is multiple of 256 bytes
> + */
> +#define ENET_BRCM_TAG_LEN		 6
> +#define ENET_PAD			 8
> +#define ENET_MAX_MTU_SIZE		 (ETH_DATA_LEN + ETH_HLEN + VLAN_HLEN + \
> +					  ENET_BRCM_TAG_LEN + ETH_FCS_LEN + ENET_PAD)
> +
> +/* Tx/Rx Dma Descriptor common bits*/
> +#define DMA_EN                           BIT(0)
> +#define DMA_RING_BUF_EN_SHIFT            0x01
> +#define DMA_RING_BUF_EN_MASK             0xFFFF
> +#define DMA_BUFLENGTH_MASK		 0x0fff
> +#define DMA_BUFLENGTH_SHIFT		 16
> +#define DMA_RING_SIZE_SHIFT		 16
> +#define DMA_OWN				 0x8000
> +#define DMA_EOP				 0x4000
> +#define DMA_SOP				 0x2000
> +#define DMA_WRAP			 0x1000
> +#define DMA_MAX_BURST_LENGTH             0x8

I think it deserves mentioning that this is the RPi4 specific value, all the other GENET hardware can use 0x10 here. It looks to me like 0x8 is probably safe for all to use, it would just be less efficient. But I guess we don't care in U-Boot.

> +/* Tx specific Dma descriptor bits */
> +#define DMA_TX_UNDERRUN			 0x0200
> +#define DMA_TX_APPEND_CRC		 0x0040
> +#define DMA_TX_OW_CRC			 0x0020
> +#define DMA_TX_DO_CSUM			 0x0010
> +#define DMA_TX_QTAG_SHIFT		 7
> +#define GENET_TDMA_REG_OFF		 (0x4000 + \
> +					  TOTAL_DESC * DMA_DESC_SIZE)
> +#define GENET_RDMA_REG_OFF		 (0x2000 + \
> +					  TOTAL_DESC * DMA_DESC_SIZE)
> +
> +/* DMA rings size */
> +#define DMA_RING_SIZE			 (0x40)
> +#define DMA_RINGS_SIZE			 (DMA_RING_SIZE * (DEFAULT_Q + 1))
> +
> +/* DMA Descriptor */
> +#define DMA_DESC_LENGTH_STATUS		 0x00
> +#define DMA_DESC_ADDRESS_LO		 0x04
> +#define DMA_DESC_ADDRESS_HI		 0x08
> +#define DMA_DESC_SIZE                    0xc
> +
> +#define DMA_FC_THRESH_HI		 (TOTAL_DESC >> 4)
> +#define DMA_FC_THRESH_LO		 5
> +#define DMA_XOFF_THRESHOLD_SHIFT	 16
> +
> +#define TDMA_RING_REG_BASE(QUEUE_NUMBER) (GENET_TDMA_REG_OFF \
> +			   + (DMA_RING_SIZE * (QUEUE_NUMBER)))

So it seems like we are *always* using the default queue number. For improved readability we could drop this parameter and hardcode this here, so that users just need to use TDMA_RING_REG_BASE.

> +#define TDMA_READ_PTR                    0x00
> +#define TDMA_CONS_INDEX                  0x08
> +#define TDMA_PROD_INDEX                  0x0C
> +#define DMA_RING_BUF_SIZE                0x10
> +#define DMA_START_ADDR                   0x14
> +#define DMA_END_ADDR                     0x1C
> +#define DMA_MBUF_DONE_THRESH             0x24
> +#define TDMA_FLOW_PERIOD                 0x28
> +#define TDMA_WRITE_PTR                   0x2C
> +
> +#define RDMA_RING_REG_BASE(QUEUE_NUMBER) (GENET_RDMA_REG_OFF \
> +			   + (DMA_RING_SIZE * (QUEUE_NUMBER)))
> +#define RDMA_WRITE_PTR                   TDMA_READ_PTR
> +#define RDMA_READ_PTR                    TDMA_WRITE_PTR
> +#define RDMA_PROD_INDEX                  TDMA_CONS_INDEX
> +#define RDMA_CONS_INDEX                  TDMA_PROD_INDEX
> +#define RDMA_XON_XOFF_THRESH             TDMA_FLOW_PERIOD
> +
> +#define TDMA_REG_BASE			 (GENET_TDMA_REG_OFF + DMA_RINGS_SIZE)
> +#define RDMA_REG_BASE			 (GENET_RDMA_REG_OFF + DMA_RINGS_SIZE)
> +#define DMA_RING_CFG			 0x0
> +#define DMA_CTRL			 0x04
> +#define DMA_SCB_BURST_SIZE		 0x0C
> +
> +#define RX_BUF_LENGTH			 2048
> +#define RX_TOTAL_BUFSIZE		 (RX_BUF_LENGTH * TOTAL_DESC)
> +#define RX_BUF_OFFSET			 2
> +
> +DECLARE_GLOBAL_DATA_PTR;
> +
> +struct bcmgenet_eth_priv {
> +	void *mac_reg;
> +	char rxbuffer[RX_TOTAL_BUFSIZE] __aligned(ARCH_DMA_MINALIGN);

Making this the first member would probably help with the alignment.

> +	void *tx_desc_base;
> +	void *rx_desc_base;
> +	int tx_index;
> +	int rx_index;
> +	int c_index;
> +	int phyaddr;
> +	u32 interface;
> +	u32 speed;
> +	struct phy_device *phydev;
> +	struct mii_dev *bus;
> +};
> +
> +static void bcmgenet_umac_reset(struct bcmgenet_eth_priv *priv)
> +{
> +	u32 reg;
> +
> +	reg = readl(priv->mac_reg + SYS_RBUF_FLUSH_CTRL);
> +	reg |= BIT(1);
> +	writel(reg, (priv->mac_reg + SYS_RBUF_FLUSH_CTRL));

Please use setbits_le32() and friends for those manipulations. This is much easier to read and less error-prone. This applies to all other bit manipulations below as well.

> +	udelay(10);
> +
> +	reg &= ~BIT(1);
> +	writel(reg, (priv->mac_reg + SYS_RBUF_FLUSH_CTRL));
> +	udelay(10);
> +}
> +
> +static void reset_umac(struct bcmgenet_eth_priv *priv)

What is the difference to the function above? The name is confusingly similar.

> +{
> +	writel(0, (priv->mac_reg + SYS_RBUF_FLUSH_CTRL));
> +	udelay(10);
> +
> +	writel(0, priv->mac_reg + UMAC_CMD);
> +
> +	writel(CMD_SW_RESET | CMD_LCL_LOOP_EN, priv->mac_reg + UMAC_CMD);
> +	udelay(2);
> +	writel(0, priv->mac_reg + UMAC_CMD);
> +}
> +
> +static void init_umac(struct bcmgenet_eth_priv *priv)
> +{
> +	u32 reg;
> +
> +	reset_umac(priv);
> +
> +	/* clear tx/rx counter */
> +	writel(MIB_RESET_RX | MIB_RESET_TX | MIB_RESET_RUNT,
> +	       priv->mac_reg + UMAC_MIB_CTRL);
> +	writel(0, priv->mac_reg + UMAC_MIB_CTRL);
> +
> +	writel(ENET_MAX_MTU_SIZE, priv->mac_reg + UMAC_MAX_FRAME_LEN);
> +
> +	/* init rx registers, enable ip header optimization */
> +	reg = readl(priv->mac_reg + RBUF_CTRL);
> +	reg |= RBUF_ALIGN_2B;
> +	writel(reg, (priv->mac_reg + RBUF_CTRL));
> +
> +	writel(1, (priv->mac_reg + RBUF_TBUF_SIZE_CTRL));
> +}
> +
> +static int _bcmgenet_write_hwaddr(struct bcmgenet_eth_priv *priv,
> +				  unsigned char *addr)
> +{
> +	writel_relaxed(((addr[0] << 24) | (addr[1] << 16) | (addr[2] << 8)
> +		       | addr[3]), priv->mac_reg + UMAC_MAC0);
> +	writel_relaxed((addr[4] << 8) | addr[5], priv->mac_reg + UMAC_MAC1);
> +
> +	return 0;
> +}

Why this function? Can't you just use the one below and put the actual code in there?

> +
> +static int bcmgenet_gmac_write_hwaddr(struct udevice *dev)
> +{
> +	struct eth_pdata *pdata = dev_get_platdata(dev);
> +	struct bcmgenet_eth_priv *priv = dev_get_priv(dev);
> +
> +	return _bcmgenet_write_hwaddr(priv, pdata->enetaddr);
> +}
> +
> +static u32 bcmgenet_dma_disable(struct bcmgenet_eth_priv *priv)
> +{
> +	u32 reg;
> +	u32 dma_ctrl;
> +
> +	dma_ctrl = 1 << (DEFAULT_Q + DMA_RING_BUF_EN_SHIFT) | DMA_EN;
> +	reg = readl(priv->mac_reg + TDMA_REG_BASE + DMA_CTRL);
> +	reg &= ~dma_ctrl;
> +	writel(reg, (priv->mac_reg + TDMA_REG_BASE + DMA_CTRL));
> +
> +	reg = readl(priv->mac_reg + RDMA_REG_BASE + DMA_CTRL);
> +	reg &= ~dma_ctrl;
> +	writel(reg, (priv->mac_reg + RDMA_REG_BASE + DMA_CTRL));
> +
> +	writel(1, priv->mac_reg + UMAC_TX_FLUSH);
> +	udelay(10);
> +	writel(0, priv->mac_reg + UMAC_TX_FLUSH);
> +
> +	return dma_ctrl;
> +}
> +
> +static void bcmgenet_enable_dma(struct bcmgenet_eth_priv *priv, u32 dma_ctrl)
> +{
> +	u32 reg;
> +
> +	dma_ctrl |= (1 << (DEFAULT_Q + DMA_RING_BUF_EN_SHIFT));
> +	dma_ctrl |= DMA_EN;
> +
> +	writel(dma_ctrl, priv->mac_reg + TDMA_REG_BASE + DMA_CTRL);
> +
> +	reg = readl(priv->mac_reg + RDMA_REG_BASE + DMA_CTRL);
> +	reg |= dma_ctrl;
> +	writel(reg, priv->mac_reg + RDMA_REG_BASE + DMA_CTRL);
> +}
> +
> +static int _bcmgenet_gmac_eth_send(struct bcmgenet_eth_priv *priv, void *packet,
> +				   int len)
> +{
> +	u32 size, len_stat, prod_index, cons;
> +	u32 tries = 100;
> +
> +	void *desc_base = (priv->tx_desc_base +	(priv->tx_index * DMA_DESC_SIZE));
> +
> +	len_stat = (len << DMA_BUFLENGTH_SHIFT) | (0x3F << DMA_TX_QTAG_SHIFT);
> +	len_stat |= (DMA_TX_APPEND_CRC | DMA_SOP | DMA_EOP);
> +
> +	prod_index = readl(priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + TDMA_PROD_INDEX);
> +
> +	size = roundup(len, ARCH_DMA_MINALIGN);

I think more important than aligning the size is aligning the base.
Not that it really matters architecturally ...

> +	flush_dcache_range((ulong)packet, (ulong)packet + size);
> +
> +	/* Set-up packet for transmission */
> +	writel(lower_32_bits((ulong)packet), (desc_base + DMA_DESC_ADDRESS_LO));
> +	writel(upper_32_bits((ulong)packet), (desc_base + DMA_DESC_ADDRESS_HI));
> +	writel(len_stat, (desc_base + DMA_DESC_LENGTH_STATUS));
> +
> +	/* Increment index and wrap-up */
> +	priv->tx_index++;
> +	if (!(priv->tx_index % TOTAL_DESC)) {
> +		priv->tx_index = 0;

This looks weird to read. I guess you just want to wrap around?
Maybe:
	if (++priv->tx_index >= TOTAL_DESC)
		....

> +	}
> +
> +	prod_index++;
> +
> +	/* Start Transmisson */
> +	writel(prod_index, (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + TDMA_PROD_INDEX));
> +
> +	do {
> +		cons = readl(priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + TDMA_CONS_INDEX);
> +	} while ((cons & 0xffff) < prod_index && --tries);
> +	if (!tries)
> +		return -ETIMEDOUT;
> +
> +	return 0;
> +}
> +
> +static int _bcmgenet_gmac_eth_recv(struct bcmgenet_eth_priv *priv, uchar **packetp)
> +{
> +	u32 len_stat;
> +	u32 len;
> +	u32 addr;
> +	u32 length = 0;
> +	void *desc_base = (priv->rx_desc_base + (priv->rx_index * DMA_DESC_SIZE));
> +
> +	len_stat = readl(desc_base + DMA_DESC_LENGTH_STATUS);
> +
> +	if (!(len_stat & DMA_OWN)) {

I think it would be cleaner to negate this and just bail out early.
But also: Is this bit safe to use? Does the MAC update this?

> +		len  = readl(desc_base + DMA_DESC_LENGTH_STATUS);
> +		addr = readl(desc_base + DMA_DESC_ADDRESS_LO);
> +
> +		length = (len >> DMA_BUFLENGTH_SHIFT) & DMA_BUFLENGTH_MASK;
> +
> +		invalidate_dcache_range((uintptr_t)addr,
> +					(addr + RX_TOTAL_BUFSIZE));

There is no need to invalidate *all* buffers, so just use RX_BUF_LENGTH.
Actually it looks like we might need lose data (our indexes!), since you invalidate cache lines beyond the buffers.

> +
> +		/*
> +		 * two dummy bytes are added for IP alignment, this can be
> +		 * avoided by not programming RBUF_ALIGN_2B bit in RBUF_CTRL
> +		 */
> +		*packetp = (uchar *)(ulong)addr + RX_BUF_OFFSET;
> +
> +		return (length - RX_BUF_OFFSET);
> +	}
> +
> +	return -EAGAIN;
> +}
> +
> +static int _bcmgenet_free_pkt(struct bcmgenet_eth_priv *priv, int len)
> +{
> +	priv->c_index = (priv->c_index + 1) & 0xFFFF;
> +	writel(priv->c_index,
> +	       priv->mac_reg + RDMA_RING_REG_BASE(DEFAULT_Q) + RDMA_CONS_INDEX);
> +	udelay(1000);

What is this udelay about?

> +
> +	priv->rx_index++;
> +	if (!(priv->rx_index % TOTAL_DESC)) {
> +		priv->rx_index = 0;
> +	}

Same comment for the wrap-around as above.

> +
> +	return 0;
> +}
> +
> +static void rx_descs_init(struct bcmgenet_eth_priv *priv)
> +{
> +	char *rxbuffs = &priv->rxbuffer[0];
> +	u32 len_stat, i;
> +	void *desc_base = priv->rx_desc_base;
> +
> +	priv->c_index = 0;
> +
> +	len_stat = ((RX_BUF_LENGTH << DMA_BUFLENGTH_SHIFT) | DMA_OWN);

Is setting DMA_OWN necessary? I had the impression this bit was read-only?

> +
> +	for (i = 0; i < TOTAL_DESC; i++) {
> +		writel(lower_32_bits((uintptr_t)&rxbuffs[i * RX_BUF_LENGTH]),
> +		       (((desc_base + (i * DMA_DESC_SIZE)) + DMA_DESC_ADDRESS_LO)));
> +		writel(upper_32_bits((uintptr_t)&rxbuffs[i * RX_BUF_LENGTH]),
> +		       (((desc_base + (i * DMA_DESC_SIZE)) + DMA_DESC_ADDRESS_HI)));
> +		writel(len_stat,
> +		       ((desc_base + (i * DMA_DESC_SIZE) + DMA_DESC_LENGTH_STATUS)));
> +	}
> +}
> +
> +static void rx_ring_init(struct bcmgenet_eth_priv *priv)
> +{
> +	writel(DMA_MAX_BURST_LENGTH,
> +	       (priv->mac_reg + RDMA_REG_BASE + DMA_SCB_BURST_SIZE));
> +	writel(0x0,
> +	       (priv->mac_reg + RDMA_RING_REG_BASE(DEFAULT_Q) + DMA_START_ADDR));
> +	writel(0x0,
> +	       (priv->mac_reg + RDMA_RING_REG_BASE(DEFAULT_Q) + RDMA_READ_PTR));
> +	writel(0x0,
> +	       (priv->mac_reg + RDMA_RING_REG_BASE(DEFAULT_Q) + RDMA_WRITE_PTR));
> +	writel((TOTAL_DESC * (DMA_DESC_SIZE - 1)),

If I get the Linux code correctly, this is supposed to point at the last *byte* of the descriptors.

> +	       (priv->mac_reg + RDMA_RING_REG_BASE(DEFAULT_Q) + DMA_END_ADDR));
> +	writel(0x0,
> +	       (priv->mac_reg + RDMA_RING_REG_BASE(DEFAULT_Q) + RDMA_PROD_INDEX));
> +	writel(0x0,
> +	       (priv->mac_reg + RDMA_RING_REG_BASE(DEFAULT_Q) + RDMA_CONS_INDEX));
> +	writel(((TOTAL_DESC << DMA_RING_SIZE_SHIFT) | RX_BUF_LENGTH),
> +	       (priv->mac_reg + RDMA_RING_REG_BASE(DEFAULT_Q) + DMA_RING_BUF_SIZE));
> +	writel(((DMA_FC_THRESH_LO << DMA_XOFF_THRESHOLD_SHIFT) | DMA_FC_THRESH_HI),
> +	       (priv->mac_reg + RDMA_RING_REG_BASE(DEFAULT_Q) + RDMA_XON_XOFF_THRESH));
> +	writel((1 << DEFAULT_Q),
> +	       (priv->mac_reg + RDMA_REG_BASE + DMA_RING_CFG));
> +}
> +
> +static void tx_ring_init(struct bcmgenet_eth_priv *priv)
> +{
> +	writel(DMA_MAX_BURST_LENGTH,
> +	       (priv->mac_reg + TDMA_REG_BASE + DMA_SCB_BURST_SIZE));

I think would be better to include both offsets into one value, since you always seem to use them together. You can keep the base in the definition:
#define DMA_SCB_BURST_SIZE	(TDMA_REG_BASE + 0x0c)

> +	writel(0x0,
> +	       (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + DMA_START_ADDR));

As mentioned above, the DEFAULT_Q parameter is redundant in our case (this deserves to be mentioned somewhere, btw.).
So at the very least drop this parameter, but also consider to include the mandatory offset into the definition of DMA_START_ADDR, as above.

> +	writel(0x0,
> +	       (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + TDMA_READ_PTR));
> +	writel(0x0,
> +	       (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + TDMA_WRITE_PTR));
> +	writel((TOTAL_DESC * (DMA_DESC_SIZE - 1)),

Same as above, I don't think this is right.

> +	       (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + DMA_END_ADDR));
> +	writel(0x0,
> +	       (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + TDMA_PROD_INDEX));
> +	writel(0x0,
> +	       (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + TDMA_CONS_INDEX));
> +	writel(0x1,
> +	       (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + DMA_MBUF_DONE_THRESH));
> +	writel(0x0,
> +	       (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + TDMA_FLOW_PERIOD));
> +	writel(((TOTAL_DESC << DMA_RING_SIZE_SHIFT) | RX_BUF_LENGTH),
> +	       (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + DMA_RING_BUF_SIZE));
> +	writel((1 << DEFAULT_Q),
> +	       (priv->mac_reg + TDMA_REG_BASE + DMA_RING_CFG));
> +}
> +
> +static int bcmgenet_gmac_eth_send(struct udevice *dev, void *packet, int length)
> +{
> +	struct bcmgenet_eth_priv *priv = dev_get_priv(dev);
> +
> +	return _bcmgenet_gmac_eth_send(priv, packet, length);
> +}
> +
> +static int bcmgenet_gmac_eth_recv(struct udevice *dev, int flags,
> +				  uchar **packetp)
> +{
> +	struct bcmgenet_eth_priv *priv = dev_get_priv(dev);
> +
> +	return _bcmgenet_gmac_eth_recv(priv, packetp);
> +}

Why those wrappers? Seems like it has been copied from some legacy or dual legacy/DM driver? Please just use one function. Applies to all other ops implementations.

> +
> +static void bcmgenet_adjust_link(struct bcmgenet_eth_priv *priv)
> +{
> +	struct phy_device *phy_dev = priv->phydev;
> +	u32 reg = 0, reg_rgmii;
> +
> +	switch (phy_dev->speed) {
> +	case SPEED_1000:
> +		reg = UMAC_SPEED_1000;
> +		break;
> +	case SPEED_100:
> +		reg = UMAC_SPEED_100;
> +		break;
> +	case SPEED_10:
> +		reg = UMAC_SPEED_10;
> +		break;
> +	}
> +
> +	reg <<= CMD_SPEED_SHIFT;
> +
> +	reg_rgmii = readl(priv->mac_reg + EXT_RGMII_OOB_CTRL);
> +	reg_rgmii &= ~OOB_DISABLE;
> +	reg_rgmii |= (RGMII_LINK | RGMII_MODE_EN | ID_MODE_DIS);
> +	writel(reg_rgmii, priv->mac_reg + EXT_RGMII_OOB_CTRL);
> +
> +	writel(reg, (priv->mac_reg + UMAC_CMD));

Those parentheses around the second argument are redundant. Please remove them, also in a lot of other places.

> +}
> +
> +static int _bcmgenet_eth_init(struct bcmgenet_eth_priv *priv, u8 *enetaddr)
> +{
> +	u32 reg, dma_ctrl;
> +
> +	priv->tx_desc_base = priv->mac_reg + 0x4000;
> +	priv->rx_desc_base = priv->mac_reg + 0x2000;
> +	priv->tx_index = 0x0;
> +	priv->rx_index = 0x0;
> +
> +	bcmgenet_umac_reset(priv);
> +
> +	init_umac(priv);
> +
> +	_bcmgenet_write_hwaddr(priv, enetaddr);
> +
> +	/* Disable RX/TX DMA and flush TX queues */
> +	dma_ctrl = bcmgenet_dma_disable(priv);
> +
> +	rx_ring_init(priv);
> +	rx_descs_init(priv);
> +
> +	tx_ring_init(priv);
> +
> +	/* Enable RX/TX DMA */
> +	bcmgenet_enable_dma(priv, dma_ctrl);
> +
> +	/* PHY Start Up, read PHY properties over the wire
> +	 * from generic PHY set-up
> +	 */
> +	phy_startup(priv->phydev);
> +
> +	/* Update MAC registers based on PHY property */
> +	bcmgenet_adjust_link(priv);
> +
> +	/* Enable Rx/Tx */
> +	reg = readl(priv->mac_reg + UMAC_CMD);
> +	reg |= (CMD_TX_EN | CMD_RX_EN);
> +	writel(reg, (priv->mac_reg + UMAC_CMD));
> +
> +	return 0;
> +}
> +
> +static int bcmgenet_gmac_eth_start(struct udevice *dev)
> +{
> +	struct eth_pdata *pdata = dev_get_platdata(dev);
> +
> +	return _bcmgenet_eth_init(dev->priv, pdata->enetaddr);
> +}
> +
> +static int bcmgenet_phy_init(struct bcmgenet_eth_priv *priv, void *dev)
> +{
> +	struct phy_device *phydev;
> +	int ret;
> +
> +	phydev = phy_connect(priv->bus, priv->phyaddr, dev, priv->interface);
> +	if (!phydev)
> +		return -ENODEV;
> +
> +	phydev->supported &= PHY_GBIT_FEATURES;
> +	if (priv->speed) {
> +		ret = phy_set_supported(priv->phydev, priv->speed);
> +		if (ret)
> +			return ret;
> +	}
> +	phydev->advertising = phydev->supported;
> +
> +	phy_connect_dev(phydev, dev);
> +
> +	priv->phydev = phydev;
> +	phy_config(priv->phydev);
> +
> +	return 0;
> +}
> +
> +static inline void bcmgenet_mdio_start(struct bcmgenet_eth_priv *priv)
> +{
> +	u32 reg;
> +
> +	reg = readl_relaxed(priv->mac_reg + MDIO_CMD);
> +	reg |= MDIO_START_BUSY;
> +	writel_relaxed(reg, priv->mac_reg + MDIO_CMD);
> +}
> +
> +static int bcmgenet_mdio_write(struct mii_dev *bus, int addr, int devad,
> +			       int reg, u16 value)
> +{
> +	struct udevice *dev = bus->priv;
> +	struct bcmgenet_eth_priv *priv = dev_get_priv(dev);
> +	u32 status, val;
> +	ulong start_time;
> +	ulong timeout_us = 20000;
> +
> +	start_time = timer_get_us();
> +
> +	/* Prepare the read operation */
> +	val = MDIO_WR | (addr << MDIO_PMD_SHIFT) |
> +		(reg << MDIO_REG_SHIFT) | (0xffff & value);
> +	writel_relaxed(val,  priv->mac_reg + MDIO_CMD);
> +
> +	/* Start MDIO transaction */
> +	bcmgenet_mdio_start(priv);
> +
> +	for (;;) {
> +		status = readl_relaxed(priv->mac_reg + MDIO_CMD);
> +		if (!(status & MDIO_START_BUSY))
> +			break;
> +		if (timeout_us > 0 && (timer_get_us() - start_time)
> +				>= timeout_us)
> +			return -ETIMEDOUT;
> +	}

Can't you use wait_for_bit_le32() here? Same for the other timeout functions.

> +
> +	return 0;
> +}
> +
> +static int bcmgenet_mdio_read(struct mii_dev *bus, int addr, int devad, int reg)
> +{
> +	struct udevice *dev = bus->priv;
> +	struct bcmgenet_eth_priv *priv = dev_get_priv(dev);
> +	u32 status, val;
> +	ulong start_time;
> +	ulong timeout_us =  20000;
> +
> +	start_time = timer_get_us();
> +
> +	/* Prepare the read operation */
> +	val = MDIO_RD | (addr << MDIO_PMD_SHIFT) | (reg << MDIO_REG_SHIFT);
> +	writel_relaxed(val, priv->mac_reg + MDIO_CMD);
> +
> +	/* Start MDIO transaction */
> +	bcmgenet_mdio_start(priv);
> +
> +	for (;;) {
> +		status = readl_relaxed(priv->mac_reg + MDIO_CMD);
> +		if (!(status & MDIO_START_BUSY))
> +			break;
> +		if (timeout_us > 0 && (timer_get_us() - start_time) >= timeout_us)
> +			return -ETIMEDOUT;
> +	}
> +
> +	val = readl_relaxed(priv->mac_reg + MDIO_CMD);
> +
> +	return val & 0xffff;
> +}
> +
> +static int bcmgenet_mdio_init(const char *name, struct udevice *priv)
> +{
> +	struct mii_dev *bus = mdio_alloc();
> +
> +	if (!bus) {
> +		debug("Failed to allocate MDIO bus\n");
> +		return -ENOMEM;
> +	}
> +
> +	bus->read = bcmgenet_mdio_read;
> +	bus->write = bcmgenet_mdio_write;
> +	snprintf(bus->name, sizeof(bus->name), name);
> +	bus->priv = (void *)priv;
> +
> +	return  mdio_register(bus);
> +}
> +
> +static int bcmgenet_interface_set(struct bcmgenet_eth_priv *priv)
> +{
> +	phy_interface_t phy_mode = priv->interface;
> +
> +	switch (phy_mode) {
> +	case PHY_INTERFACE_MODE_RGMII:
> +		writel(PORT_MODE_EXT_GPHY, priv->mac_reg + SYS_PORT_CTRL);
> +		break;
> +	default:
> +		printf("unknown phy mode: %d\n", priv->interface);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static int bcmgenet_eth_probe(struct udevice *dev)
> +{
> +	struct eth_pdata *pdata = dev_get_platdata(dev);
> +	struct bcmgenet_eth_priv *priv = dev_get_priv(dev);
> +	int offset = dev_of_offset(dev);
> +	const char *name;
> +	int reg;
> +	u8 major;
> +
> +	priv->mac_reg = (void *)pdata->iobase;
> +	priv->interface = pdata->phy_interface;
> +	priv->speed = pdata->max_speed;
> +
> +	/* Read GENET HW version */
> +	reg = readl_relaxed(priv->mac_reg + SYS_REV_CTRL);
> +	major = (reg >> 24 & 0x0f);
> +	if (major == 6)
> +		major = 5;
> +	else if (major == 5)
> +		major = 4;
> +	else if (major == 0)
> +		major = 1;
> +
> +	debug("GENET version is %1d.%1d EPHY: 0x%04x",
> +	      major, (reg >> 16) & 0x0f, reg & 0xffff);
> +
> +	bcmgenet_interface_set(priv);
> +
> +	offset = fdt_first_subnode(gd->fdt_blob, offset);
> +	name = fdt_get_name(gd->fdt_blob, offset, NULL);
> +
> +	bcmgenet_mdio_init(name, dev);
> +	priv->bus = miiphy_get_dev_by_name(name);
> +
> +	return bcmgenet_phy_init(priv, dev);
> +}
> +
> +static void bcmgenet_gmac_eth_stop(struct udevice *dev)
> +{
> +	struct bcmgenet_eth_priv *priv = dev_get_priv(dev);
> +	u32 reg, dma_ctrl;
> +
> +	reg = readl(priv->mac_reg + UMAC_CMD);
> +	reg &= ~(CMD_TX_EN | CMD_RX_EN);
> +	writel(reg, (priv->mac_reg + UMAC_CMD));
> +
> +	dma_ctrl = 1 << (DEFAULT_Q + DMA_RING_BUF_EN_SHIFT) | DMA_EN;
> +	reg = readl(priv->mac_reg + TDMA_REG_BASE + DMA_CTRL);
> +	reg &= ~dma_ctrl;
> +	writel(reg, (priv->mac_reg + TDMA_REG_BASE + DMA_CTRL));
> +}
> +
> +static int bcmgenet_gmac_free_pkt(struct udevice *dev, uchar *packet,
> +				  int length)
> +{
> +	struct bcmgenet_eth_priv *priv = dev_get_priv(dev);
> +
> +	return _bcmgenet_free_pkt(priv, length);
> +}
> +
> +static const struct eth_ops bcmgenet_gmac_eth_ops = {
> +	.start                  = bcmgenet_gmac_eth_start,
> +	.write_hwaddr           = bcmgenet_gmac_write_hwaddr,
> +	.send                   = bcmgenet_gmac_eth_send,
> +	.recv                   = bcmgenet_gmac_eth_recv,
> +	.free_pkt               = bcmgenet_gmac_free_pkt,
> +	.stop                   = bcmgenet_gmac_eth_stop,
> +};
> +
> +static int bcmgenet_eth_ofdata_to_platdata(struct udevice *dev)
> +{
> +	struct eth_pdata *pdata = dev_get_platdata(dev);
> +	struct bcmgenet_eth_priv *priv = dev_get_priv(dev);
> +	const char *phy_mode;
> +	int node = dev_of_offset(dev);
> +	int offset = 0;
> +
> +	pdata->iobase = (phys_addr_t)devfdt_get_addr(dev);
> +
> +	/* Get phy mode from DT */
> +	pdata->phy_interface = -1;
> +	phy_mode = fdt_getprop(gd->fdt_blob, dev_of_offset(dev), "phy-mode",
> +			       NULL);
> +
> +	if (phy_mode)
> +		pdata->phy_interface = phy_get_interface_by_name(phy_mode);
> +	if (pdata->phy_interface == -1) {
> +		debug("%s: Invalid PHY interface '%s'\n", __func__, phy_mode);
> +		return -EINVAL;
> +	}
> +
> +	offset = fdtdec_lookup_phandle(gd->fdt_blob, node, "phy-handle");
> +	if (offset > 0) {
> +		priv->phyaddr = fdtdec_get_int(gd->fdt_blob, offset, "reg", 0);
> +		pdata->max_speed = fdtdec_get_int(gd->fdt_blob, offset, "max-speed", 0);
> +	}
> +
> +	return 0;
> +}
> +
> +static const struct udevice_id bcmgenet_eth_ids[] = {
> +	{.compatible = "brcm,genet-v5"},

I think you need the new RPi4 specific compatible string here as well, to cope with mainline DTs.

That's it for my first pass. I will keep debugging this.

Cheers,
Andre.


> +	{ }
> +};
> +
> +U_BOOT_DRIVER(eth_bcmgenet) = {
> +	.name   = "eth_bcmgenet",
> +	.id     = UCLASS_ETH,
> +	.of_match = bcmgenet_eth_ids,
> +	.ofdata_to_platdata = bcmgenet_eth_ofdata_to_platdata,
> +	.probe  = bcmgenet_eth_probe,
> +	.ops    = &bcmgenet_gmac_eth_ops,
> +	.priv_auto_alloc_size = sizeof(struct bcmgenet_eth_priv),
> +	.platdata_auto_alloc_size = sizeof(struct eth_pdata),
> +	.flags = DM_FLAG_ALLOC_PRIV_DMA,
> +};



More information about the U-Boot mailing list