[RFC PATCH 2/2] net: Add Support for GENET Ethernet controller

Amit Tomer amittomer25 at gmail.com
Mon Dec 16 13:45:01 CET 2019


Hi,

Thanks for having the look.

>
> I am afraid that needs to be more elaborate. You could mention Broadcom, also that it's only for v5 of it. And that it's based on reverse engineering the Linux driver.
> Text in the cover letter will not make it into the repo, so you should mention this here again.

Ok, would take care of it next version.

> > Signed-off-by: Amit Singh Tomar <amittomer25 at gmail.com>
> > ---
> >  configs/rpi_4_defconfig |   2 +
> >  drivers/net/Kconfig     |   7 +
> >  drivers/net/Makefile    |   1 +
> >  drivers/net/bcmgenet.c  | 770 ++++++++++++++++++++++++++++++++++++++++++++++++
> >  4 files changed, 780 insertions(+)
> >  create mode 100644 drivers/net/bcmgenet.c
> >
> > diff --git a/configs/rpi_4_defconfig b/configs/rpi_4_defconfig
> > index 8cf1bb8..b0f9cf1 100644
> > --- a/configs/rpi_4_defconfig
> > +++ b/configs/rpi_4_defconfig
> > @@ -24,6 +24,8 @@ CONFIG_DM_KEYBOARD=y
> >  CONFIG_DM_MMC=y
> >  CONFIG_MMC_SDHCI=y
> >  CONFIG_MMC_SDHCI_BCM2835=y
> > +CONFIG_DM_ETH=y
>
> I am not sure that belongs into the individual defconfig file, I guess it should be in some Kconfig instead?
>
Ok, Would check it.

>
> > +
> >  config DWC_ETH_QOS
> >       bool "Synopsys DWC Ethernet QOS device support"
> >       depends on DM_ETH
> > diff --git a/drivers/net/Makefile b/drivers/net/Makefile
> > index 3099183..6e0a688 100644
> > --- a/drivers/net/Makefile
> > +++ b/drivers/net/Makefile
> > @@ -8,6 +8,7 @@ obj-$(CONFIG_AG7XXX) += ag7xxx.o
> >  obj-$(CONFIG_ARMADA100_FEC) += armada100_fec.o

> Broadcom GENETv5.
>
> And you could tell a bit about the history here, that it's based on the Linux driver, but limited to support v5 and do away with all the priority queues, etc.
Ok.

> > + *
> > + */
> > +
> > +#include <asm/io.h>
> > +#include <clk.h>
> > +#include <cpu_func.h>
> > +#include <dm.h>
> > +#include <fdt_support.h>
> > +#include <linux/err.h>
> > +#include <malloc.h>
> > +#include <miiphy.h>
> > +#include <net.h>
> > +#include <dm/of_access.h>
> > +#include <dm/ofnode.h>
> > +#include <linux/libfdt.h>
> > +#include <linux/iopoll.h>
> > +#include <asm/dma-mapping.h>
> > +
> > +/* Register definitions derived from Linux source */
> > +#define SYS_REV_CTRL                  0x00
> > +
> > +#define SYS_PORT_CTRL                         0x04
> > +#define PORT_MODE_EXT_GPHY            3
> > +
> > +#define GENET_SYS_OFF                         0x0000
> > +#define SYS_RBUF_FLUSH_CTRL           (GENET_SYS_OFF  + 0x08)
> > +#define SYS_TBUF_FLUSH_CTRL           (GENET_SYS_OFF  + 0x0C)
> > +
> > +#define GENET_EXT_OFF                         0x0080
> > +#define EXT_RGMII_OOB_CTRL               (GENET_EXT_OFF + 0x0C)
> > +#define RGMII_MODE_EN_V123               BIT(0)
> > +#define RGMII_LINK                       BIT(4)
> > +#define OOB_DISABLE                      BIT(5)
> > +#define RGMII_MODE_EN                    BIT(6)
> > +#define ID_MODE_DIS                      BIT(16)
> > +
> > +#define GENET_RBUF_OFF                        0x0300
> > +#define RBUF_FLUSH_CTRL_V1            (GENET_RBUF_OFF + 0x04)
> > +#define RBUF_TBUF_SIZE_CTRL              (GENET_RBUF_OFF + 0xb4)
> > +#define RBUF_CTRL                        (GENET_RBUF_OFF + 0x00)
> > +#define RBUF_64B_EN                      BIT(0)
> > +#define RBUF_ALIGN_2B                    BIT(1)
> > +#define RBUF_BAD_DIS                     BIT(2)
>
> Thanks for aligning this - I guess it would be in an editor ;-)
> But please try to use tabs consistently for indenting this.
>
> > +
> > +#define GENET_UMAC_OFF                        0x0800
> > +#define UMAC_MIB_CTRL                         (GENET_UMAC_OFF + 0x580)
> > +#define UMAC_MAX_FRAME_LEN            (GENET_UMAC_OFF + 0x014)
> > +#define UMAC_MAC0                     (GENET_UMAC_OFF + 0x00C)
> > +#define UMAC_MAC1                     (GENET_UMAC_OFF + 0x010)
> > +#define UMAC_CMD                      (GENET_UMAC_OFF + 0x008)
> > +#define MDIO_CMD                      (GENET_UMAC_OFF + 0x614)
> > +#define UMAC_TX_FLUSH                         (GENET_UMAC_OFF + 0x334)
> > +#define MDIO_START_BUSY                  BIT(29)
> > +#define MDIO_READ_FAIL                   BIT(28)
> > +#define MDIO_RD                          (2 << 26)
> > +#define MDIO_WR                          BIT(26)
> > +#define MDIO_PMD_SHIFT                   21
> > +#define MDIO_PMD_MASK                    0x1F
> > +#define MDIO_REG_SHIFT                   16
> > +#define MDIO_REG_MASK                    0x1F
> > +
> > +#define CMD_TX_EN                     BIT(0)
> > +#define CMD_RX_EN                     BIT(1)
> > +#define UMAC_SPEED_10                         0
> > +#define UMAC_SPEED_100                        1
> > +#define UMAC_SPEED_1000                       2
> > +#define UMAC_SPEED_2500                       3
> > +#define CMD_SPEED_SHIFT                       2
> > +#define CMD_SPEED_MASK                        3
> > +#define CMD_SW_RESET                  BIT(13)
> > +#define CMD_LCL_LOOP_EN                       BIT(15)
> > +#define CMD_TX_EN                     BIT(0)
> > +#define CMD_RX_EN                     BIT(1)
> > +
> > +#define MIB_RESET_RX                  BIT(0)
> > +#define MIB_RESET_RUNT                        BIT(1)
> > +#define MIB_RESET_TX                  BIT(2)
> > +
> > +/* total number of Buffer Descriptors, same for Rx/Tx */
> > +#define TOTAL_DESC                    256
> > +
> > +#define DEFAULT_Q                        0x10
>
> I think that deserves a comment.
> Maybe Florian can comment on what this odd "queue 16" is really about? It seems weird to support *17* queues and make the last one the default.
>
> > +
> > +/* Body(1500) + EH_SIZE(14) + VLANTAG(4) + BRCMTAG(6) + FCS(4) = 1528.
> > + * 1536 is multiple of 256 bytes
> > + */
> > +#define ENET_BRCM_TAG_LEN             6
> > +#define ENET_PAD                      8
> > +#define ENET_MAX_MTU_SIZE             (ETH_DATA_LEN + ETH_HLEN + VLAN_HLEN + \
> > +                                       ENET_BRCM_TAG_LEN + ETH_FCS_LEN + ENET_PAD)
> > +
> > +/* Tx/Rx Dma Descriptor common bits*/
> > +#define DMA_EN                           BIT(0)
> > +#define DMA_RING_BUF_EN_SHIFT            0x01
> > +#define DMA_RING_BUF_EN_MASK             0xFFFF
> > +#define DMA_BUFLENGTH_MASK            0x0fff
> > +#define DMA_BUFLENGTH_SHIFT           16
> > +#define DMA_RING_SIZE_SHIFT           16
> > +#define DMA_OWN                               0x8000
> > +#define DMA_EOP                               0x4000
> > +#define DMA_SOP                               0x2000
> > +#define DMA_WRAP                      0x1000
> > +#define DMA_MAX_BURST_LENGTH             0x8
>
> I think it deserves mentioning that this is the RPi4 specific value, all the other GENET hardware can use 0x10 here. It looks to me like 0x8 is probably safe for all to use, it would just be less efficient. But I guess we don't care in U-Boot.

Ok, To be honest I have planned to add more documentation to this
driver as you earlier suggested that best place to have document it is
in the driver(it would also
help in future to read own code )

>
> > +/* Tx specific Dma descriptor bits */
> > +#define DMA_TX_UNDERRUN                       0x0200
> > +#define DMA_TX_APPEND_CRC             0x0040
> > +#define DMA_TX_OW_CRC                         0x0020
> > +#define DMA_TX_DO_CSUM                        0x0010
> > +#define DMA_TX_QTAG_SHIFT             7
> > +#define GENET_TDMA_REG_OFF            (0x4000 + \
> > +                                       TOTAL_DESC * DMA_DESC_SIZE)
> > +#define GENET_RDMA_REG_OFF            (0x2000 + \
> > +                                       TOTAL_DESC * DMA_DESC_SIZE)
> > +
> > +/* DMA rings size */
> > +#define DMA_RING_SIZE                         (0x40)
> > +#define DMA_RINGS_SIZE                        (DMA_RING_SIZE * (DEFAULT_Q + 1))
> > +
> > +/* DMA Descriptor */
> > +#define DMA_DESC_LENGTH_STATUS                0x00
> > +#define DMA_DESC_ADDRESS_LO           0x04
> > +#define DMA_DESC_ADDRESS_HI           0x08
> > +#define DMA_DESC_SIZE                    0xc
> > +
> > +#define DMA_FC_THRESH_HI              (TOTAL_DESC >> 4)
> > +#define DMA_FC_THRESH_LO              5
> > +#define DMA_XOFF_THRESHOLD_SHIFT      16
> > +
> > +#define TDMA_RING_REG_BASE(QUEUE_NUMBER) (GENET_TDMA_REG_OFF \
> > +                        + (DMA_RING_SIZE * (QUEUE_NUMBER)))
>
> So it seems like we are *always* using the default queue number. For improved readability we could drop this parameter and hardcode this here, so that users just need to use TDMA_RING_REG_BASE.

Yeah, this can be done, only thing if in future we would like to work
on some specific queue , keeping that in my mind I put it.

> > +#define TDMA_READ_PTR                    0x00
> > +#define TDMA_CONS_INDEX                  0x08
> > +#define TDMA_PROD_INDEX                  0x0C
> > +#define DMA_RING_BUF_SIZE                0x10
> > +#define DMA_START_ADDR                   0x14
> > +#define DMA_END_ADDR                     0x1C
> > +#define DMA_MBUF_DONE_THRESH             0x24
> > +#define TDMA_FLOW_PERIOD                 0x28
> > +#define TDMA_WRITE_PTR                   0x2C
> > +
> > +#define RDMA_RING_REG_BASE(QUEUE_NUMBER) (GENET_RDMA_REG_OFF \
> > +                        + (DMA_RING_SIZE * (QUEUE_NUMBER)))
> > +#define RDMA_WRITE_PTR                   TDMA_READ_PTR
> > +#define RDMA_READ_PTR                    TDMA_WRITE_PTR
> > +#define RDMA_PROD_INDEX                  TDMA_CONS_INDEX
> > +#define RDMA_CONS_INDEX                  TDMA_PROD_INDEX
> > +#define RDMA_XON_XOFF_THRESH             TDMA_FLOW_PERIOD
> > +
> > +#define TDMA_REG_BASE                         (GENET_TDMA_REG_OFF + DMA_RINGS_SIZE)
> > +#define RDMA_REG_BASE                         (GENET_RDMA_REG_OFF + DMA_RINGS_SIZE)
> > +#define DMA_RING_CFG                  0x0
> > +#define DMA_CTRL                      0x04
> > +#define DMA_SCB_BURST_SIZE            0x0C
> > +
> > +#define RX_BUF_LENGTH                         2048
> > +#define RX_TOTAL_BUFSIZE              (RX_BUF_LENGTH * TOTAL_DESC)
> > +#define RX_BUF_OFFSET                         2
> > +
> > +DECLARE_GLOBAL_DATA_PTR;
> > +
> > +struct bcmgenet_eth_priv {
> > +     void *mac_reg;
> > +     char rxbuffer[RX_TOTAL_BUFSIZE] __aligned(ARCH_DMA_MINALIGN);
>
> Making this the first member would probably help with the alignment.
Ok.
>
> > +     void *tx_desc_base;
> > +     void *rx_desc_base;
> > +     int tx_index;
> > +     int rx_index;
> > +     int c_index;
> > +     int phyaddr;
> > +     u32 interface;
> > +     u32 speed;
> > +     struct phy_device *phydev;
> > +     struct mii_dev *bus;
> > +};
> > +
> > +static void bcmgenet_umac_reset(struct bcmgenet_eth_priv *priv)
> > +{
> > +     u32 reg;
> > +
> > +     reg = readl(priv->mac_reg + SYS_RBUF_FLUSH_CTRL);
> > +     reg |= BIT(1);
> > +     writel(reg, (priv->mac_reg + SYS_RBUF_FLUSH_CTRL));
>
> Please use setbits_le32() and friends for those manipulations. This is much easier to read and less error-prone. This applies to all other bit manipulations below as well.

Sure.

> > +     udelay(10);
> > +
> > +     reg &= ~BIT(1);
> > +     writel(reg, (priv->mac_reg + SYS_RBUF_FLUSH_CTRL));
> > +     udelay(10);
> > +}
> > +
> > +static void reset_umac(struct bcmgenet_eth_priv *priv)
>
> What is the difference to the function above? The name is confusingly similar.
I think only function is needed, this is something I took my older
version of  Linux driver and is leftover
which is not needed.

Will check this again.
> > +{
> > +     writel(0, (priv->mac_reg + SYS_RBUF_FLUSH_CTRL));
> > +     udelay(10);
> > +
> > +     writel(0, priv->mac_reg + UMAC_CMD);
> > +
> > +     writel(CMD_SW_RESET | CMD_LCL_LOOP_EN, priv->mac_reg + UMAC_CMD);
> > +     udelay(2);
> > +     writel(0, priv->mac_reg + UMAC_CMD);
> > +}
> > +
> > +static void init_umac(struct bcmgenet_eth_priv *priv)
> > +{
> > +     u32 reg;
> > +
> > +     reset_umac(priv);
> > +
> > +     /* clear tx/rx counter */
> > +     writel(MIB_RESET_RX | MIB_RESET_TX | MIB_RESET_RUNT,
> > +            priv->mac_reg + UMAC_MIB_CTRL);
> > +     writel(0, priv->mac_reg + UMAC_MIB_CTRL);
> > +
> > +     writel(ENET_MAX_MTU_SIZE, priv->mac_reg + UMAC_MAX_FRAME_LEN);
> > +
> > +     /* init rx registers, enable ip header optimization */
> > +     reg = readl(priv->mac_reg + RBUF_CTRL);
> > +     reg |= RBUF_ALIGN_2B;
> > +     writel(reg, (priv->mac_reg + RBUF_CTRL));
> > +
> > +     writel(1, (priv->mac_reg + RBUF_TBUF_SIZE_CTRL));
> > +}
> > +
> > +static int _bcmgenet_write_hwaddr(struct bcmgenet_eth_priv *priv,
> > +                               unsigned char *addr)
> > +{
> > +     writel_relaxed(((addr[0] << 24) | (addr[1] << 16) | (addr[2] << 8)
> > +                    | addr[3]), priv->mac_reg + UMAC_MAC0);
> > +     writel_relaxed((addr[4] << 8) | addr[5], priv->mac_reg + UMAC_MAC1);
> > +
> > +     return 0;
> > +}
>
> Why this function? Can't you just use the one below and put the actual code in there?

Actually , we are calling it after reset as well and is trigger from two places.
I would place it in single function and test it.

> > +
> > +static int bcmgenet_gmac_write_hwaddr(struct udevice *dev)
> > +{
> > +     struct eth_pdata *pdata = dev_get_platdata(dev);
> > +     struct bcmgenet_eth_priv *priv = dev_get_priv(dev);
> > +
> > +     return _bcmgenet_write_hwaddr(priv, pdata->enetaddr);
> > +}
> > +
> > +static u32 bcmgenet_dma_disable(struct bcmgenet_eth_priv *priv)
> > +{
> > +     u32 reg;
> > +     u32 dma_ctrl;
> > +
> > +     dma_ctrl = 1 << (DEFAULT_Q + DMA_RING_BUF_EN_SHIFT) | DMA_EN;
> > +     reg = readl(priv->mac_reg + TDMA_REG_BASE + DMA_CTRL);
> > +     reg &= ~dma_ctrl;
> > +     writel(reg, (priv->mac_reg + TDMA_REG_BASE + DMA_CTRL));
> > +
> > +     reg = readl(priv->mac_reg + RDMA_REG_BASE + DMA_CTRL);
> > +     reg &= ~dma_ctrl;
> > +     writel(reg, (priv->mac_reg + RDMA_REG_BASE + DMA_CTRL));
> > +
> > +     writel(1, priv->mac_reg + UMAC_TX_FLUSH);
> > +     udelay(10);
> > +     writel(0, priv->mac_reg + UMAC_TX_FLUSH);
> > +
> > +     return dma_ctrl;
> > +}
> > +
> > +static void bcmgenet_enable_dma(struct bcmgenet_eth_priv *priv, u32 dma_ctrl)
> > +{
> > +     u32 reg;
> > +
> > +     dma_ctrl |= (1 << (DEFAULT_Q + DMA_RING_BUF_EN_SHIFT));
> > +     dma_ctrl |= DMA_EN;
> > +
> > +     writel(dma_ctrl, priv->mac_reg + TDMA_REG_BASE + DMA_CTRL);
> > +
> > +     reg = readl(priv->mac_reg + RDMA_REG_BASE + DMA_CTRL);
> > +     reg |= dma_ctrl;
> > +     writel(reg, priv->mac_reg + RDMA_REG_BASE + DMA_CTRL);
> > +}
> > +
> > +static int _bcmgenet_gmac_eth_send(struct bcmgenet_eth_priv *priv, void *packet,
> > +                                int len)
> > +{
> > +     u32 size, len_stat, prod_index, cons;
> > +     u32 tries = 100;
> > +
> > +     void *desc_base = (priv->tx_desc_base + (priv->tx_index * DMA_DESC_SIZE));
> > +
> > +     len_stat = (len << DMA_BUFLENGTH_SHIFT) | (0x3F << DMA_TX_QTAG_SHIFT);
> > +     len_stat |= (DMA_TX_APPEND_CRC | DMA_SOP | DMA_EOP);
> > +
> > +     prod_index = readl(priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + TDMA_PROD_INDEX);
> > +
> > +     size = roundup(len, ARCH_DMA_MINALIGN);
>
> I think more important than aligning the size is aligning the base.
> Not that it really matters architecturally ...
>
> > +     flush_dcache_range((ulong)packet, (ulong)packet + size);
> > +
> > +     /* Set-up packet for transmission */
> > +     writel(lower_32_bits((ulong)packet), (desc_base + DMA_DESC_ADDRESS_LO));
> > +     writel(upper_32_bits((ulong)packet), (desc_base + DMA_DESC_ADDRESS_HI));
> > +     writel(len_stat, (desc_base + DMA_DESC_LENGTH_STATUS));
> > +
> > +     /* Increment index and wrap-up */
> > +     priv->tx_index++;
> > +     if (!(priv->tx_index % TOTAL_DESC)) {
> > +             priv->tx_index = 0;
>
> This looks weird to read. I guess you just want to wrap around?
> Maybe:
>         if (++priv->tx_index >= TOTAL_DESC)
>                 ....
Ok, I would try it.
> > +     }
> > +
> > +     prod_index++;
> > +
> > +     /* Start Transmisson */
> > +     writel(prod_index, (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + TDMA_PROD_INDEX));
> > +
> > +     do {
> > +             cons = readl(priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + TDMA_CONS_INDEX);
> > +     } while ((cons & 0xffff) < prod_index && --tries);
> > +     if (!tries)
> > +             return -ETIMEDOUT;
> > +
> > +     return 0;
> > +}
> > +
> > +static int _bcmgenet_gmac_eth_recv(struct bcmgenet_eth_priv *priv, uchar **packetp)
> > +{
> > +     u32 len_stat;
> > +     u32 len;
> > +     u32 addr;
> > +     u32 length = 0;
> > +     void *desc_base = (priv->rx_desc_base + (priv->rx_index * DMA_DESC_SIZE));
> > +
> > +     len_stat = readl(desc_base + DMA_DESC_LENGTH_STATUS);
> > +
> > +     if (!(len_stat & DMA_OWN)) {
>
> I think it would be cleaner to negate this and just bail out early.
> But also: Is this bit safe to use? Does the MAC update this?
>
> > +             len  = readl(desc_base + DMA_DESC_LENGTH_STATUS);
> > +             addr = readl(desc_base + DMA_DESC_ADDRESS_LO);
> > +
> > +             length = (len >> DMA_BUFLENGTH_SHIFT) & DMA_BUFLENGTH_MASK;
> > +
> > +             invalidate_dcache_range((uintptr_t)addr,
> > +                                     (addr + RX_TOTAL_BUFSIZE));
>
> There is no need to invalidate *all* buffers, so just use RX_BUF_LENGTH.
> Actually it looks like we might need lose data (our indexes!), since you invalidate cache lines beyond the buffers.

Ok, I tried this and seen some performance improvement while tftp but
it didn't solve the issue of tftp larger files.
> > +
> > +             /*
> > +              * two dummy bytes are added for IP alignment, this can be
> > +              * avoided by not programming RBUF_ALIGN_2B bit in RBUF_CTRL
> > +              */
> > +             *packetp = (uchar *)(ulong)addr + RX_BUF_OFFSET;
> > +
> > +             return (length - RX_BUF_OFFSET);
> > +     }
> > +
> > +     return -EAGAIN;
> > +}
> > +
> > +static int _bcmgenet_free_pkt(struct bcmgenet_eth_priv *priv, int len)
> > +{
> > +     priv->c_index = (priv->c_index + 1) & 0xFFFF;
> > +     writel(priv->c_index,
> > +            priv->mac_reg + RDMA_RING_REG_BASE(DEFAULT_Q) + RDMA_CONS_INDEX);
> > +     udelay(1000);
>
> What is this udelay about?

If I don't use this, tftp never breaks and hashes are keep coming.But
since we have moved to polling prod/cons stuff
it is not required anymore.

> > +
> > +     priv->rx_index++;
> > +     if (!(priv->rx_index % TOTAL_DESC)) {
> > +             priv->rx_index = 0;
> > +     }
>
> Same comment for the wrap-around as above.
>
> > +
> > +     return 0;
> > +}
> > +
> > +static void rx_descs_init(struct bcmgenet_eth_priv *priv)
> > +{
> > +     char *rxbuffs = &priv->rxbuffer[0];
> > +     u32 len_stat, i;
> > +     void *desc_base = priv->rx_desc_base;
> > +
> > +     priv->c_index = 0;
> > +
> > +     len_stat = ((RX_BUF_LENGTH << DMA_BUFLENGTH_SHIFT) | DMA_OWN);
>
> Is setting DMA_OWN necessary? I had the impression this bit was read-only?

To be honest this actually works but comes with very poor performance
and noticed data rate in Kbs(100-400Kbs)
On the other hand if we use polling PROD/CONS , I see huge improvement
while tftp files.

> > +
> > +     for (i = 0; i < TOTAL_DESC; i++) {
> > +             writel(lower_32_bits((uintptr_t)&rxbuffs[i * RX_BUF_LENGTH]),
> > +                    (((desc_base + (i * DMA_DESC_SIZE)) + DMA_DESC_ADDRESS_LO)));
> > +             writel(upper_32_bits((uintptr_t)&rxbuffs[i * RX_BUF_LENGTH]),
> > +                    (((desc_base + (i * DMA_DESC_SIZE)) + DMA_DESC_ADDRESS_HI)));
> > +             writel(len_stat,
> > +                    ((desc_base + (i * DMA_DESC_SIZE) + DMA_DESC_LENGTH_STATUS)));
> > +     }
> > +}
> > +
> > +static void rx_ring_init(struct bcmgenet_eth_priv *priv)
> > +{
> > +     writel(DMA_MAX_BURST_LENGTH,
> > +            (priv->mac_reg + RDMA_REG_BASE + DMA_SCB_BURST_SIZE));
> > +     writel(0x0,
> > +            (priv->mac_reg + RDMA_RING_REG_BASE(DEFAULT_Q) + DMA_START_ADDR));
> > +     writel(0x0,
> > +            (priv->mac_reg + RDMA_RING_REG_BASE(DEFAULT_Q) + RDMA_READ_PTR));
> > +     writel(0x0,
> > +            (priv->mac_reg + RDMA_RING_REG_BASE(DEFAULT_Q) + RDMA_WRITE_PTR));
> > +     writel((TOTAL_DESC * (DMA_DESC_SIZE - 1)),
>
> If I get the Linux code correctly, this is supposed to point at the last *byte* of the descriptors.

Thanks for stressing on this.Actually done some experiments with
DMA_END_ADDRESSES and found
that its three worlds per descriptor and counted in numbers.

This allowed to tftp Bigger files and I can now tftp Kernel Image file
of (19 Mb) with data transfer rate
of 4-5 MB/s.

> > +            (priv->mac_reg + RDMA_RING_REG_BASE(DEFAULT_Q) + DMA_END_ADDR));
> > +     writel(0x0,
> > +            (priv->mac_reg + RDMA_RING_REG_BASE(DEFAULT_Q) + RDMA_PROD_INDEX));
> > +     writel(0x0,
> > +            (priv->mac_reg + RDMA_RING_REG_BASE(DEFAULT_Q) + RDMA_CONS_INDEX));
> > +     writel(((TOTAL_DESC << DMA_RING_SIZE_SHIFT) | RX_BUF_LENGTH),
> > +            (priv->mac_reg + RDMA_RING_REG_BASE(DEFAULT_Q) + DMA_RING_BUF_SIZE));
> > +     writel(((DMA_FC_THRESH_LO << DMA_XOFF_THRESHOLD_SHIFT) | DMA_FC_THRESH_HI),
> > +            (priv->mac_reg + RDMA_RING_REG_BASE(DEFAULT_Q) + RDMA_XON_XOFF_THRESH));
> > +     writel((1 << DEFAULT_Q),
> > +            (priv->mac_reg + RDMA_REG_BASE + DMA_RING_CFG));
> > +}
> > +
> > +static void tx_ring_init(struct bcmgenet_eth_priv *priv)
> > +{
> > +     writel(DMA_MAX_BURST_LENGTH,
> > +            (priv->mac_reg + TDMA_REG_BASE + DMA_SCB_BURST_SIZE));
>
> I think would be better to include both offsets into one value, since you always seem to use them together. You can keep the base in the definition:
> #define DMA_SCB_BURST_SIZE      (TDMA_REG_BASE + 0x0c)

But this is used for both Rx and Tx , having two instances(one for rx
and one for tx) looks bit odd to me.
Shall , we keep this as it is ?

> > +     writel(0x0,
> > +            (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + DMA_START_ADDR));
>
> As mentioned above, the DEFAULT_Q parameter is redundant in our case (this deserves to be mentioned somewhere, btw.).
> So at the very least drop this parameter, but also consider to include the mandatory offset into the definition of DMA_START_ADDR, as above.
>
> > +     writel(0x0,
> > +            (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + TDMA_READ_PTR));
> > +     writel(0x0,
> > +            (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + TDMA_WRITE_PTR));
> > +     writel((TOTAL_DESC * (DMA_DESC_SIZE - 1)),
>
> Same as above, I don't think this is right.
Yeah :)

I wish some one has written Linux code this way.

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index d857df8..a967a7d 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2102,7 +2102,7 @@ static void bcmgenet_init_tx_ring(struct
bcmgenet_priv *priv,
                                  TDMA_READ_PTR);
        bcmgenet_tdma_ring_writel(priv, index, start_ptr * words_per_bd,
                                  TDMA_WRITE_PTR);
-       bcmgenet_tdma_ring_writel(priv, index, end_ptr * words_per_bd - 1,
+       bcmgenet_tdma_ring_writel(priv, index, (end_ptr * words_per_bd) - 1,
                                  DMA_END_ADDR);
 }

But yeah, its my stupidity not able to read operators precedence here.

>
> > +            (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + DMA_END_ADDR));
> > +     writel(0x0,
> > +            (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + TDMA_PROD_INDEX));
> > +     writel(0x0,
> > +            (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + TDMA_CONS_INDEX));
> > +     writel(0x1,
> > +            (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + DMA_MBUF_DONE_THRESH));
> > +     writel(0x0,
> > +            (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + TDMA_FLOW_PERIOD));
> > +     writel(((TOTAL_DESC << DMA_RING_SIZE_SHIFT) | RX_BUF_LENGTH),
> > +            (priv->mac_reg + TDMA_RING_REG_BASE(DEFAULT_Q) + DMA_RING_BUF_SIZE));
> > +     writel((1 << DEFAULT_Q),
> > +            (priv->mac_reg + TDMA_REG_BASE + DMA_RING_CFG));
> > +}
> > +
> > +static int bcmgenet_gmac_eth_send(struct udevice *dev, void *packet, int length)
> > +{
> > +     struct bcmgenet_eth_priv *priv = dev_get_priv(dev);
> > +
> > +     return _bcmgenet_gmac_eth_send(priv, packet, length);
> > +}
> > +
> > +static int bcmgenet_gmac_eth_recv(struct udevice *dev, int flags,
> > +                               uchar **packetp)
> > +{
> > +     struct bcmgenet_eth_priv *priv = dev_get_priv(dev);
> > +
> > +     return _bcmgenet_gmac_eth_recv(priv, packetp);
> > +}
>
> Why those wrappers? Seems like it has been copied from some legacy or dual legacy/DM driver? Please just use one function. Applies to all other ops implementations.

Ok. I would use single function for this.

> > +
> > +static void bcmgenet_adjust_link(struct bcmgenet_eth_priv *priv)
> > +{
> > +     struct phy_device *phy_dev = priv->phydev;
> > +     u32 reg = 0, reg_rgmii;
> > +
> > +     switch (phy_dev->speed) {
> > +     case SPEED_1000:
> > +             reg = UMAC_SPEED_1000;
> > +             break;
> > +     case SPEED_100:
> > +             reg = UMAC_SPEED_100;
> > +             break;
> > +     case SPEED_10:
> > +             reg = UMAC_SPEED_10;
> > +             break;
> > +     }
> > +
> > +     reg <<= CMD_SPEED_SHIFT;
> > +
> > +     reg_rgmii = readl(priv->mac_reg + EXT_RGMII_OOB_CTRL);
> > +     reg_rgmii &= ~OOB_DISABLE;
> > +     reg_rgmii |= (RGMII_LINK | RGMII_MODE_EN | ID_MODE_DIS);
> > +     writel(reg_rgmii, priv->mac_reg + EXT_RGMII_OOB_CTRL);
> > +
> > +     writel(reg, (priv->mac_reg + UMAC_CMD));
>
> Those parentheses around the second argument are redundant. Please remove them, also in a lot of other places.

Ok.
> > +}
> > +
> > +static int _bcmgenet_eth_init(struct bcmgenet_eth_priv *priv, u8 *enetaddr)
> > +{
> > +     u32 reg, dma_ctrl;
> > +
> > +     priv->tx_desc_base = priv->mac_reg + 0x4000;
> > +     priv->rx_desc_base = priv->mac_reg + 0x2000;
> > +     priv->tx_index = 0x0;
> > +     priv->rx_index = 0x0;
> > +
> > +     bcmgenet_umac_reset(priv);
> > +
> > +     init_umac(priv);
> > +
> > +     _bcmgenet_write_hwaddr(priv, enetaddr);
> > +
> > +     /* Disable RX/TX DMA and flush TX queues */
> > +     dma_ctrl = bcmgenet_dma_disable(priv);
> > +
> > +     rx_ring_init(priv);
> > +     rx_descs_init(priv);
> > +
> > +     tx_ring_init(priv);
> > +
> > +     /* Enable RX/TX DMA */
> > +     bcmgenet_enable_dma(priv, dma_ctrl);
> > +
> > +     /* PHY Start Up, read PHY properties over the wire
> > +      * from generic PHY set-up
> > +      */
> > +     phy_startup(priv->phydev);
> > +
> > +     /* Update MAC registers based on PHY property */
> > +     bcmgenet_adjust_link(priv);
> > +
> > +     /* Enable Rx/Tx */
> > +     reg = readl(priv->mac_reg + UMAC_CMD);
> > +     reg |= (CMD_TX_EN | CMD_RX_EN);
> > +     writel(reg, (priv->mac_reg + UMAC_CMD));
> > +
> > +     return 0;
> > +}
> > +
> > +static int bcmgenet_gmac_eth_start(struct udevice *dev)
> > +{
> > +     struct eth_pdata *pdata = dev_get_platdata(dev);
> > +
> > +     return _bcmgenet_eth_init(dev->priv, pdata->enetaddr);
> > +}
> > +
> > +static int bcmgenet_phy_init(struct bcmgenet_eth_priv *priv, void *dev)
> > +{
> > +     struct phy_device *phydev;
> > +     int ret;
> > +
> > +     phydev = phy_connect(priv->bus, priv->phyaddr, dev, priv->interface);
> > +     if (!phydev)
> > +             return -ENODEV;
> > +
> > +     phydev->supported &= PHY_GBIT_FEATURES;
> > +     if (priv->speed) {
> > +             ret = phy_set_supported(priv->phydev, priv->speed);
> > +             if (ret)
> > +                     return ret;
> > +     }
> > +     phydev->advertising = phydev->supported;
> > +
> > +     phy_connect_dev(phydev, dev);
> > +
> > +     priv->phydev = phydev;
> > +     phy_config(priv->phydev);
> > +
> > +     return 0;
> > +}
> > +
> > +static inline void bcmgenet_mdio_start(struct bcmgenet_eth_priv *priv)
> > +{
> > +     u32 reg;
> > +
> > +     reg = readl_relaxed(priv->mac_reg + MDIO_CMD);
> > +     reg |= MDIO_START_BUSY;
> > +     writel_relaxed(reg, priv->mac_reg + MDIO_CMD);
> > +}
> > +
> > +static int bcmgenet_mdio_write(struct mii_dev *bus, int addr, int devad,
> > +                            int reg, u16 value)
> > +{
> > +     struct udevice *dev = bus->priv;
> > +     struct bcmgenet_eth_priv *priv = dev_get_priv(dev);
> > +     u32 status, val;
> > +     ulong start_time;
> > +     ulong timeout_us = 20000;
> > +
> > +     start_time = timer_get_us();
> > +
> > +     /* Prepare the read operation */
> > +     val = MDIO_WR | (addr << MDIO_PMD_SHIFT) |
> > +             (reg << MDIO_REG_SHIFT) | (0xffff & value);
> > +     writel_relaxed(val,  priv->mac_reg + MDIO_CMD);
> > +
> > +     /* Start MDIO transaction */
> > +     bcmgenet_mdio_start(priv);
> > +
> > +     for (;;) {
> > +             status = readl_relaxed(priv->mac_reg + MDIO_CMD);
> > +             if (!(status & MDIO_START_BUSY))
> > +                     break;
> > +             if (timeout_us > 0 && (timer_get_us() - start_time)
> > +                             >= timeout_us)
> > +                     return -ETIMEDOUT;
> > +     }
>
> Can't you use wait_for_bit_le32() here? Same for the other timeout functions.

Ok, will do it.

>> > +
> > +     return 0;
> > +}
> > +
> > +static int bcmgenet_mdio_read(struct mii_dev *bus, int addr, int devad, int reg)
> > +{
> > +     struct udevice *dev = bus->priv;
> > +     struct bcmgenet_eth_priv *priv = dev_get_priv(dev);
> > +     u32 status, val;
> > +     ulong start_time;
> > +     ulong timeout_us =  20000;
> > +
> > +     start_time = timer_get_us();
> > +
> > +     /* Prepare the read operation */
> > +     val = MDIO_RD | (addr << MDIO_PMD_SHIFT) | (reg << MDIO_REG_SHIFT);
> > +     writel_relaxed(val, priv->mac_reg + MDIO_CMD);
> > +
> > +     /* Start MDIO transaction */
> > +     bcmgenet_mdio_start(priv);
> > +
> > +     for (;;) {
> > +             status = readl_relaxed(priv->mac_reg + MDIO_CMD);
> > +             if (!(status & MDIO_START_BUSY))
> > +                     break;
> > +             if (timeout_us > 0 && (timer_get_us() - start_time) >= timeout_us)
> > +                     return -ETIMEDOUT;
> > +     }
> > +
> > +     val = readl_relaxed(priv->mac_reg + MDIO_CMD);
> > +
> > +     return val & 0xffff;
> > +}
> > +
> > +static int bcmgenet_mdio_init(const char *name, struct udevice *priv)
> > +{
> > +     struct mii_dev *bus = mdio_alloc();
> > +
> > +     if (!bus) {
> > +             debug("Failed to allocate MDIO bus\n");
> > +             return -ENOMEM;
> > +     }
> > +
> > +     bus->read = bcmgenet_mdio_read;
> > +     bus->write = bcmgenet_mdio_write;
> > +     snprintf(bus->name, sizeof(bus->name), name);
> > +     bus->priv = (void *)priv;
> > +
> > +     return  mdio_register(bus);
> > +}
> > +
> > +static int bcmgenet_interface_set(struct bcmgenet_eth_priv *priv)
> > +{
> > +     phy_interface_t phy_mode = priv->interface;
> > +
> > +     switch (phy_mode) {
> > +     case PHY_INTERFACE_MODE_RGMII:
> > +             writel(PORT_MODE_EXT_GPHY, priv->mac_reg + SYS_PORT_CTRL);
> > +             break;
> > +     default:
> > +             printf("unknown phy mode: %d\n", priv->interface);
> > +             return -EINVAL;
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +static int bcmgenet_eth_probe(struct udevice *dev)
> > +{
> > +     struct eth_pdata *pdata = dev_get_platdata(dev);
> > +     struct bcmgenet_eth_priv *priv = dev_get_priv(dev);
> > +     int offset = dev_of_offset(dev);
> > +     const char *name;
> > +     int reg;
> > +     u8 major;
> > +
> > +     priv->mac_reg = (void *)pdata->iobase;
> > +     priv->interface = pdata->phy_interface;
> > +     priv->speed = pdata->max_speed;
> > +
> > +     /* Read GENET HW version */
> > +     reg = readl_relaxed(priv->mac_reg + SYS_REV_CTRL);
> > +     major = (reg >> 24 & 0x0f);
> > +     if (major == 6)
> > +             major = 5;
> > +     else if (major == 5)
> > +             major = 4;
> > +     else if (major == 0)
> > +             major = 1;
> > +
> > +     debug("GENET version is %1d.%1d EPHY: 0x%04x",
> > +           major, (reg >> 16) & 0x0f, reg & 0xffff);
> > +
> > +     bcmgenet_interface_set(priv);
> > +
> > +     offset = fdt_first_subnode(gd->fdt_blob, offset);
> > +     name = fdt_get_name(gd->fdt_blob, offset, NULL);
> > +
> > +     bcmgenet_mdio_init(name, dev);
> > +     priv->bus = miiphy_get_dev_by_name(name);
> > +
> > +     return bcmgenet_phy_init(priv, dev);
> > +}
> > +
> > +static void bcmgenet_gmac_eth_stop(struct udevice *dev)
> > +{
> > +     struct bcmgenet_eth_priv *priv = dev_get_priv(dev);
> > +     u32 reg, dma_ctrl;
> > +
> > +     reg = readl(priv->mac_reg + UMAC_CMD);
> > +     reg &= ~(CMD_TX_EN | CMD_RX_EN);
> > +     writel(reg, (priv->mac_reg + UMAC_CMD));
> > +
> > +     dma_ctrl = 1 << (DEFAULT_Q + DMA_RING_BUF_EN_SHIFT) | DMA_EN;
> > +     reg = readl(priv->mac_reg + TDMA_REG_BASE + DMA_CTRL);
> > +     reg &= ~dma_ctrl;
> > +     writel(reg, (priv->mac_reg + TDMA_REG_BASE + DMA_CTRL));
> > +}
> > +
> > +static int bcmgenet_gmac_free_pkt(struct udevice *dev, uchar *packet,
> > +                               int length)
> > +{
> > +     struct bcmgenet_eth_priv *priv = dev_get_priv(dev);
> > +
> > +     return _bcmgenet_free_pkt(priv, length);
> > +}
> > +
> > +static const struct eth_ops bcmgenet_gmac_eth_ops = {
> > +     .start                  = bcmgenet_gmac_eth_start,
> > +     .write_hwaddr           = bcmgenet_gmac_write_hwaddr,
> > +     .send                   = bcmgenet_gmac_eth_send,
> > +     .recv                   = bcmgenet_gmac_eth_recv,
> > +     .free_pkt               = bcmgenet_gmac_free_pkt,
> > +     .stop                   = bcmgenet_gmac_eth_stop,
> > +};
> > +
> > +static int bcmgenet_eth_ofdata_to_platdata(struct udevice *dev)
> > +{
> > +     struct eth_pdata *pdata = dev_get_platdata(dev);
> > +     struct bcmgenet_eth_priv *priv = dev_get_priv(dev);
> > +     const char *phy_mode;
> > +     int node = dev_of_offset(dev);
> > +     int offset = 0;
> > +
> > +     pdata->iobase = (phys_addr_t)devfdt_get_addr(dev);
> > +
> > +     /* Get phy mode from DT */
> > +     pdata->phy_interface = -1;
> > +     phy_mode = fdt_getprop(gd->fdt_blob, dev_of_offset(dev), "phy-mode",
> > +                            NULL);
> > +
> > +     if (phy_mode)
> > +             pdata->phy_interface = phy_get_interface_by_name(phy_mode);
> > +     if (pdata->phy_interface == -1) {
> > +             debug("%s: Invalid PHY interface '%s'\n", __func__, phy_mode);
> > +             return -EINVAL;
> > +     }
> > +
> > +     offset = fdtdec_lookup_phandle(gd->fdt_blob, node, "phy-handle");
> > +     if (offset > 0) {
> > +             priv->phyaddr = fdtdec_get_int(gd->fdt_blob, offset, "reg", 0);
> > +             pdata->max_speed = fdtdec_get_int(gd->fdt_blob, offset, "max-speed", 0);
> > +     }
> > +
> > +     return 0;
> > +}
> > +
> > +static const struct udevice_id bcmgenet_eth_ids[] = {
> > +     {.compatible = "brcm,genet-v5"},
>
> I think you need the new RPi4 specific compatible string here as well, to cope with mainline DTs.

Ok, I would check this.

Thanks
Amit.


More information about the U-Boot mailing list