[U-Boot] [PATCH 2/4] net: fec_mxc: allow use with cache enabled

Marek Vasut marex at denx.de
Sat Mar 3 00:40:29 CET 2012


> Whoops.
> 
> Forgot to add the origin of this patch to the commit message:
> 	http://lists.denx.de/pipermail/u-boot/2012-February/117695.html
> 
> Thanks Marek.

Eric, I hope you won't mind if we respin this patch a few times to make sure 
nothing gets broken by this.

M

> 
> On 03/02/2012 04:06 PM, Eric Nelson wrote:
> > 	ensure that transmit and receive buffers are cache-line aligned
> > 	
> >          invalidate cache after each packet received
> >          flush cache before transmitting
> > 
> > Signed-off-by: Eric Nelson<eric.nelson at boundarydevices.com>
> > ---
> > 
> >   drivers/net/fec_mxc.c |  248
> >   ++++++++++++++++++++++++++++++++++++-------------
> >   drivers/net/fec_mxc.h |   19 +----
> >   2 files changed, 184 insertions(+), 83 deletions(-)
> > 
> > diff --git a/drivers/net/fec_mxc.c b/drivers/net/fec_mxc.c
> > index 1fdd071..f72304b 100644
> > --- a/drivers/net/fec_mxc.c
> > +++ b/drivers/net/fec_mxc.c
> > @@ -50,6 +50,33 @@ DECLARE_GLOBAL_DATA_PTR;
> > 
> >   #define	CONFIG_FEC_MXC_SWAP_PACKET
> >   #endif
> > 
> > +#ifndef	CONFIG_FEC_DESC_ALIGNMENT
> > +#define	CONFIG_FEC_DESC_ALIGNMENT	ARCH_DMA_MINALIGN
> > +#endif
> > +
> > +#ifndef	CONFIG_FEC_DATA_ALIGNMENT
> > +#define	CONFIG_FEC_DATA_ALIGNMENT	ARCH_DMA_MINALIGN
> > +#endif
> > +
> > +/* Check various alignment issues at compile time */
> > +#if ((CONFIG_FEC_DESC_ALIGNMENT<  16) || (CONFIG_FEC_DESC_ALIGNMENT % 16
> > != 0)) +#error	"CONFIG_FEC_DESC_ALIGNMENT must be multiple of 16!"
> > +#endif
> > +
> > +#if ((CONFIG_FEC_DATA_ALIGNMENT<  16) || (CONFIG_FEC_DATA_ALIGNMENT % 16
> > != 0)) +#error	"CONFIG_FEC_DATA_ALIGNMENT must be multiple of 16!"
> > +#endif
> > +
> > +#if ((PKTALIGN<  CONFIG_FEC_DATA_ALIGNMENT) || \
> > +	(PKTALIGN % CONFIG_FEC_DATA_ALIGNMENT != 0))
> > +#error	"PKTALIGN must be multiple of CONFIG_FEC_DATA_ALIGNMENT!"
> > +#endif
> > +
> > +#if ((PKTSIZE_ALIGN<  CONFIG_FEC_DATA_ALIGNMENT) || \
> > +	(PKTSIZE_ALIGN % CONFIG_FEC_DATA_ALIGNMENT != 0))
> > +#error	"PKTSIZE_ALIGN must be multiple of CONFIG_FEC_DATA_ALIGNMENT!"
> > +#endif
> > +
> > 
> >   #undef DEBUG
> >   
> >   struct nbuf {
> > 
> > @@ -259,43 +286,47 @@ static int fec_tx_task_disable(struct fec_priv
> > *fec)
> > 
> >    * Initialize receive task's buffer descriptors
> >    * @param[in] fec all we know about the device yet
> >    * @param[in] count receive buffer count to be allocated
> > 
> > - * @param[in] size size of each receive buffer
> > + * @param[in] dsize desired size of each receive buffer
> > 
> >    * @return 0 on success
> >    *
> >    * For this task we need additional memory for the data buffers. And
> >    each * data buffer requires some alignment. Thy must be aligned to a
> >    specific
> > 
> > - * boundary each (DB_DATA_ALIGNMENT).
> > + * boundary each.
> > 
> >    */
> > 
> > -static int fec_rbd_init(struct fec_priv *fec, int count, int size)
> > +static int fec_rbd_init(struct fec_priv *fec, int count, int dsize)
> > 
> >   {
> > 
> > -	int ix;
> > -	uint32_t p = 0;
> > -
> > -	/* reserve data memory and consider alignment */
> > -	if (fec->rdb_ptr == NULL)
> > -		fec->rdb_ptr = malloc(size * count + DB_DATA_ALIGNMENT);
> > -	p = (uint32_t)fec->rdb_ptr;
> > -	if (!p) {
> > -		puts("fec_mxc: not enough malloc memory\n");
> > -		return -ENOMEM;
> > -	}
> > -	memset((void *)p, 0, size * count + DB_DATA_ALIGNMENT);
> > -	p += DB_DATA_ALIGNMENT-1;
> > -	p&= ~(DB_DATA_ALIGNMENT-1);
> > -
> > -	for (ix = 0; ix<  count; ix++) {
> > -		writel(p,&fec->rbd_base[ix].data_pointer);
> > -		p += size;
> > -		writew(FEC_RBD_EMPTY,&fec->rbd_base[ix].status);
> > -		writew(0,&fec->rbd_base[ix].data_length);
> > -	}
> > +	uint32_t size;
> > +	int i;
> > +
> > 
> >   	/*
> > 
> > -	 * mark the last RBD to close the ring
> > +	 * Allocate memory for the buffers. This allocation respects the
> > +	 * alignment
> > 
> >   	 */
> > 
> > -	writew(FEC_RBD_WRAP | FEC_RBD_EMPTY,&fec->rbd_base[ix - 1].status);
> > +	size = roundup(dsize, CONFIG_FEC_DATA_ALIGNMENT);
> > +	for (i = 0; i<  count; i++) {
> > +		if (0 == fec->rbd_base[i].data_pointer) {
> > +			uint8_t *data = memalign(CONFIG_FEC_DATA_ALIGNMENT, 
size);
> > +			if (!data) {
> > +				printf("%s: error allocating rxbuf %d\n", 
__func__, i);
> > +				goto err;
> > +			}
> > +			fec->rbd_base[i].data_pointer = (uint32_t)data;
> > +		} // needs allocation
> > +		fec->rbd_base[i].status = FEC_RBD_EMPTY;
> > +		fec->rbd_base[i].data_length = 0;
> > +	}
> > +
> > +	/* Mark the last RBD to close the ring. */
> > +	fec->rbd_base[i - 1].status = FEC_RBD_EMPTY | FEC_RBD_WRAP;
> > 
> >   	fec->rbd_index = 0;
> >   	
> >   	return 0;
> > 
> > +
> > +err:
> > +	for (; i>= 0; i--)
> > +		free((uint8_t *)fec->rbd_base[i].data_pointer);
> > +
> > +	return -ENOMEM;
> > 
> >   }
> >   
> >   /**
> > 
> > @@ -312,8 +343,8 @@ static int fec_rbd_init(struct fec_priv *fec, int
> > count, int size)
> > 
> >    */
> >   
> >   static void fec_tbd_init(struct fec_priv *fec)
> >   {
> > 
> > -	writew(0x0000,&fec->tbd_base[0].status);
> > -	writew(FEC_TBD_WRAP,&fec->tbd_base[1].status);
> > +	fec->tbd_base[0].status = 0;
> > +	fec->tbd_base[1].status = FEC_TBD_WRAP;
> > 
> >   	fec->tbd_index = 0;
> >   
> >   }
> > 
> > @@ -387,12 +418,25 @@ static int fec_open(struct eth_device *edev)
> > 
> >   {
> >   
> >   	struct fec_priv *fec = (struct fec_priv *)edev->priv;
> >   	int speed;
> > 
> > +	uint32_t addr, size;
> > +	int i;
> > 
> >   	debug("fec_open: fec_open(dev)\n");
> >   	/* full-duplex, heartbeat disabled */
> >   	writel(1<<  2,&fec->eth->x_cntrl);
> >   	fec->rbd_index = 0;
> > 
> > +	/* Invalidate all descriptors */
> > +	for (i = 0; i<  FEC_RBD_NUM - 1; i++)
> > +		fec_rbd_clean(0,&fec->rbd_base[i]);
> > +	fec_rbd_clean(1,&fec->rbd_base[i]);
> > +
> > +	/* Flush the descriptors into RAM */
> > +	size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd),
> > +			CONFIG_FEC_DATA_ALIGNMENT);
> > +	addr = (uint32_t)&fec->rbd_base[0];
> > +	flush_dcache_range(addr, addr + size);
> > +
> > 
> >   #ifdef FEC_QUIRK_ENET_MAC
> >   
> >   	/* Enable ENET HW endian SWAP */
> >   	writel(readl(&fec->eth->ecntrl) | FEC_ECNTRL_DBSWAP,
> > 
> > @@ -478,38 +522,44 @@ static int fec_open(struct eth_device *edev)
> > 
> >   static int fec_init(struct eth_device *dev, bd_t* bd)
> >   {
> > 
> > -	uint32_t base;
> > 
> >   	struct fec_priv *fec = (struct fec_priv *)dev->priv;
> >   	uint32_t mib_ptr = (uint32_t)&fec->eth->rmon_t_drop;
> >   	uint32_t rcntrl;
> > 
> > -	int i;
> > +	uint32_t size;
> > +	int i, ret;
> > 
> >   	/* Initialize MAC address */
> >   	fec_set_hwaddr(dev);
> >   	
> >   	/*
> > 
> > -	 * reserve memory for both buffer descriptor chains at once
> > -	 * Datasheet forces the startaddress of each chain is 16 byte
> > -	 * aligned
> > +	 * Allocate transmit descriptors, there are two in total. This
> > +	 * allocation respects cache alignment.
> > 
> >   	 */
> > 
> > -	if (fec->base_ptr == NULL)
> > -		fec->base_ptr = malloc((2 + FEC_RBD_NUM) *
> > -				sizeof(struct fec_bd) + DB_ALIGNMENT);
> > -	base = (uint32_t)fec->base_ptr;
> > -	if (!base) {
> > -		puts("fec_mxc: not enough malloc memory\n");
> > -		return -ENOMEM;
> > +	if (!fec->tbd_base) {
> > +		size = roundup(2 * sizeof(struct fec_bd),
> > +				CONFIG_FEC_DATA_ALIGNMENT);
> > +		fec->tbd_base = memalign(CONFIG_FEC_DESC_ALIGNMENT, size);
> > +		if (!fec->tbd_base) {
> > +			ret = -ENOMEM;
> > +			goto err1;
> > +		}
> > +		memset(fec->tbd_base, 0, size);
> > 
> >   	}
> > 
> > -	memset((void *)base, 0, (2 + FEC_RBD_NUM) *
> > -			sizeof(struct fec_bd) + DB_ALIGNMENT);
> > -	base += (DB_ALIGNMENT-1);
> > -	base&= ~(DB_ALIGNMENT-1);
> > -
> > -	fec->rbd_base = (struct fec_bd *)base;
> > -
> > -	base += FEC_RBD_NUM * sizeof(struct fec_bd);
> > 
> > -	fec->tbd_base = (struct fec_bd *)base;
> > +	/*
> > +	 * Allocate receive descriptors. This allocation respects cache
> > +	 * alignment.
> > +	 */
> > +	if (!fec->rbd_base) {
> > +		size = roundup(FEC_RBD_NUM * sizeof(struct fec_bd),
> > +				CONFIG_FEC_DATA_ALIGNMENT);
> > +		fec->rbd_base = memalign(CONFIG_FEC_DESC_ALIGNMENT, size);
> > +		if (!fec->rbd_base) {
> > +			ret = -ENOMEM;
> > +			goto err2;
> > +		}
> > +		memset(fec->rbd_base, 0, size);
> > +	}
> > 
> >   	/*
> >   	
> >   	 * Set interrupt mask register
> > 
> > @@ -570,9 +620,8 @@ static int fec_init(struct eth_device *dev, bd_t* bd)
> > 
> >   	 * Initialize RxBD/TxBD rings
> >   	 */
> >   	
> >   	if (fec_rbd_init(fec, FEC_RBD_NUM, FEC_MAX_PKT_SIZE)<  0) {
> > 
> > -		free(fec->base_ptr);
> > -		fec->base_ptr = NULL;
> > -		return -ENOMEM;
> > +		ret = -ENOMEM;
> > +		goto err3;
> > 
> >   	}
> >   	fec_tbd_init(fec);
> > 
> > @@ -583,6 +632,13 @@ static int fec_init(struct eth_device *dev, bd_t*
> > bd)
> > 
> >   #endif
> >   
> >   	fec_open(dev);
> >   	return 0;
> > 
> > +
> > +err3:
> > +	free(fec->rbd_base);
> > +err2:
> > +	free(fec->tbd_base);
> > +err1:
> > +	return ret;
> > 
> >   }
> >   
> >   /**
> > 
> > @@ -631,9 +687,11 @@ static void fec_halt(struct eth_device *dev)
> > 
> >    * @param[in] length Data count in bytes
> >    * @return 0 on success
> >    */
> > 
> > -static int fec_send(struct eth_device *dev, volatile void* packet, int
> > length) +static int fec_send(struct eth_device *dev, volatile void
> > *packet, int length)
> > 
> >   {
> >   
> >   	unsigned int status;
> > 
> > +	uint32_t size;
> > +	uint32_t addr;
> > 
> >   	/*
> >   	
> >   	 * This routine transmits one frame.  This routine only accepts
> > 
> > @@ -650,15 +708,21 @@ static int fec_send(struct eth_device *dev,
> > volatile void* packet, int length)
> > 
> >   	}
> >   	
> >   	/*
> > 
> > -	 * Setup the transmit buffer
> > -	 * Note: We are always using the first buffer for transmission,
> > -	 * the second will be empty and only used to stop the DMA engine
> > +	 * Setup the transmit buffer. We are always using the first buffer for
> > +	 * transmission, the second will be empty and only used to stop the DMA
> > +	 * engine. We also flush the packet to RAM here to avoid cache trouble.
> > 
> >   	 */
> >   
> >   #ifdef	CONFIG_FEC_MXC_SWAP_PACKET
> >   
> >   	swap_packet((uint32_t *)packet, length);
> >   
> >   #endif
> > 
> > -	writew(length,&fec->tbd_base[fec->tbd_index].data_length);
> > -	writel((uint32_t)packet,&fec->tbd_base[fec->tbd_index].data_pointer);
> > +
> > +	addr = (uint32_t)packet;
> > +	size = roundup(length, CONFIG_FEC_DATA_ALIGNMENT);
> > +	flush_dcache_range(addr, addr + size);
> > +
> > +	fec->tbd_base[fec->tbd_index].data_length = length;
> > +	fec->tbd_base[fec->tbd_index].data_pointer = addr;
> > +
> > 
> >   	/*
> >   	
> >   	 * update BD's status now
> > 
> >   	 * This block:
> > @@ -667,9 +731,18 @@ static int fec_send(struct eth_device *dev, volatile
> > void* packet, int length)
> > 
> >   	 * - might be the last BD in the list, so the address counter should
> >   	 *   wrap (->  keep the WRAP flag)
> >   	 */
> > 
> > -	status = readw(&fec->tbd_base[fec->tbd_index].status)&  FEC_TBD_WRAP;
> > +	status = fec->tbd_base[fec->tbd_index].status&  FEC_TBD_WRAP;
> > 
> >   	status |= FEC_TBD_LAST | FEC_TBD_TC | FEC_TBD_READY;
> > 
> > -	writew(status,&fec->tbd_base[fec->tbd_index].status);
> > +	fec->tbd_base[fec->tbd_index].status = status;
> > +
> > +	/*
> > +	 * Flush data cache. This code flushes both TX descriptors to RAM.
> > +	 * After this code this code, the descritors will be safely in RAM
> > +	 * and we can start DMA.
> > +	 */
> > +	size = roundup(2 * sizeof(struct fec_bd), CONFIG_FEC_DATA_ALIGNMENT);
> > +	addr = (uint32_t)fec->tbd_base;
> > +	flush_dcache_range(addr, addr + size);
> > 
> >   	/*
> >   	
> >   	 * Enable SmartDMA transmit task
> > 
> > @@ -677,11 +750,16 @@ static int fec_send(struct eth_device *dev,
> > volatile void* packet, int length)
> > 
> >   	fec_tx_task_enable(fec);
> >   	
> >   	/*
> > 
> > -	 * wait until frame is sent .
> > +	 * Wait until frame is sent. On each turn of the wait cycle, we must
> > +	 * invalidate data cache to see what's really in RAM. Also, we need
> > +	 * barrier here.
> > 
> >   	 */
> > 
> > +	invalidate_dcache_range(addr, addr + size);
> > 
> >   	while (readw(&fec->tbd_base[fec->tbd_index].status)&  FEC_TBD_READY) {
> >   	
> >   		udelay(1);
> > 
> > +		invalidate_dcache_range(addr, addr + size);
> > 
> >   	}
> > 
> > +
> > 
> >   	debug("fec_send: status 0x%x index %d\n",
> >   	
> >   			readw(&fec->tbd_base[fec->tbd_index].status),
> >   			fec->tbd_index);
> > 
> > @@ -707,6 +785,8 @@ static int fec_recv(struct eth_device *dev)
> > 
> >   	int frame_length, len = 0;
> >   	struct nbuf *frame;
> >   	uint16_t bd_status;
> > 
> > +	uint32_t addr, size;
> > +	int i;
> > 
> >   	uchar buff[FEC_MAX_PKT_SIZE];
> >   	
> >   	/*
> > 
> > @@ -737,8 +817,23 @@ static int fec_recv(struct eth_device *dev)
> > 
> >   	}
> >   	
> >   	/*
> > 
> > -	 * ensure reading the right buffer status
> > +	 * Read the buffer status. Before the status can be read, the data
> > cache +	 * must be invalidated, because the data in RAM might have been
> > changed +	 * by DMA. The descriptors are properly aligned to cachelines
> > so there's +	 * no need to worry they'd overlap.
> > +	 *
> > +	 * WARNING: By invalidating the descriptor here, we also invalidate
> > +	 * the descriptors surrounding this one. Therefore we can NOT change
> > the +	 * contents of this descriptor nor the surrounding ones. The
> > problem is +	 * that in order to mark the descriptor as processed, we
> > need to change +	 * the descriptor. The solution is to mark the whole
> > cache line when all +	 * descriptors in the cache line are processed.
> > 
> >   	 */
> > 
> > +	addr = (uint32_t)rbd;
> > +	addr&= ~(CONFIG_FEC_DATA_ALIGNMENT - 1);
> > +	size = roundup(sizeof(struct fec_bd), CONFIG_FEC_DATA_ALIGNMENT);
> > +	invalidate_dcache_range(addr, addr + size);
> > +
> > 
> >   	bd_status = readw(&rbd->status);
> >   	debug("fec_recv: status 0x%x\n", bd_status);
> > 
> > @@ -751,6 +846,13 @@ static int fec_recv(struct eth_device *dev)
> > 
> >   			frame = (struct nbuf *)readl(&rbd->data_pointer);
> >   			frame_length = readw(&rbd->data_length) - 4;
> >   			/*
> > 
> > +			 * Invalidate data cache over the buffer
> > +			 */
> > +			addr = (uint32_t)frame;
> > +			size = roundup(frame_length, CONFIG_FEC_DATA_ALIGNMENT);
> > +			invalidate_dcache_range(addr, addr + size);
> > +
> > +			/*
> > 
> >   			 *  Fill the buffer and pass it to upper layers
> >   			 */
> >   
> >   #ifdef	CONFIG_FEC_MXC_SWAP_PACKET
> > 
> > @@ -765,11 +867,27 @@ static int fec_recv(struct eth_device *dev)
> > 
> >   						(ulong)rbd->data_pointer,
> >   						bd_status);
> >   		
> >   		}
> > 
> > +
> > 
> >   		/*
> > 
> > -		 * free the current buffer, restart the engine
> > -		 * and move forward to the next buffer
> > +		 * Free the current buffer, restart the engine and move forward
> > +		 * to the next buffer. Here we check if the whole cacheline of
> > +		 * descriptors was already processed and if so, we mark it free
> > +		 * as whole.
> > 
> >   		 */
> > 
> > -		fec_rbd_clean(fec->rbd_index == (FEC_RBD_NUM - 1) ? 1 : 0, rbd);
> > +		size = (CONFIG_FEC_DATA_ALIGNMENT / sizeof(struct fec_bd)) - 1;
> > +		if ((fec->rbd_index&  size) == size) {
> > +			i = fec->rbd_index - size;
> > +			addr = (uint32_t)&fec->rbd_base[i];
> > +			for (; i<= fec->rbd_index + size; i++) {
> > +				if (i == (FEC_RBD_NUM - 1))
> > +					fec_rbd_clean(1,&fec->rbd_base[i]);
> > +				else
> > +					fec_rbd_clean(0,&fec->rbd_base[i]);
> > +			}
> > +			flush_dcache_range(addr,
> > +				addr + CONFIG_FEC_DATA_ALIGNMENT);
> > +		}
> > +
> > 
> >   		fec_rx_task_enable(fec);
> >   		fec->rbd_index = (fec->rbd_index + 1) % FEC_RBD_NUM;
> >   	
> >   	}
> > 
> > diff --git a/drivers/net/fec_mxc.h b/drivers/net/fec_mxc.h
> > index 2eb7803..852b2e0 100644
> > --- a/drivers/net/fec_mxc.h
> > +++ b/drivers/net/fec_mxc.h
> > @@ -234,22 +234,6 @@ struct ethernet_regs {
> > 
> >   #endif
> >   
> >   /**
> > 
> > - * @brief Descriptor buffer alignment
> > - *
> > - * i.MX27 requires a 16 byte alignment (but for the first element only)
> > - */
> > -#define DB_ALIGNMENT		16
> > -
> > -/**
> > - * @brief Data buffer alignment
> > - *
> > - * i.MX27 requires a four byte alignment for transmit and 16 bits
> > - * alignment for receive so take 16
> > - * Note: Valid for member data_pointer in struct buffer_descriptor
> > - */
> > -#define DB_DATA_ALIGNMENT	16
> > -
> > -/**
> > 
> >    * @brief Receive&  Transmit Buffer Descriptor definitions
> >    *
> >    * Note: The first BD must be aligned (see DB_ALIGNMENT)
> > 
> > @@ -282,8 +266,7 @@ struct fec_priv {
> > 
> >   	struct fec_bd *tbd_base;	/* TBD ring */
> >   	int tbd_index;			/* next transmit BD to write */
> >   	bd_t *bd;
> > 
> > -	void *rdb_ptr;
> > -	void *base_ptr;
> > +	uint8_t *tdb_ptr;
> > 
> >   	int dev_id;
> >   	int phy_id;
> >   	struct mii_dev *bus;
> 
> _______________________________________________
> U-Boot mailing list
> U-Boot at lists.denx.de
> http://lists.denx.de/mailman/listinfo/u-boot


More information about the U-Boot mailing list