[U-Boot] [PATCH 1/3] net: defragment IP packets

Robin Getz rgetz at blackfin.uclinux.org
Fri Jul 31 01:15:16 CEST 2009


On Thu 30 Jul 2009 05:02, Alessandro Rubini pondered:
> The defragmenting code is enabled by CONFIG_IP_DEFRAG. The code
> is useful for TFTP transfers, so the static reassembly buffer is sized
> based on CONFIG_TFTP_MAXBLOCK (default is 16kB).
> 
> The packet buffer is used as an array of "hole" structures, acting as
> a double-linked list. Each new fragment can split a hole in two,
> reduce a hole or fill a hole. No support is there for a fragment
> overlapping two diffrent holes (i.e., thre new fragment is across an
> already-received fragment).
> 
> The code includes a number of suggestions by Robin Getz.
> 
> Signed-off-by: Alessandro Rubini <rubini at gnudd.com>
> ---
>  net/net.c |  172
> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 files changed, 167 insertions(+), 5 deletions(-)
> 
> diff --git a/net/net.c b/net/net.c
> index 641c37c..be382dd 100644
> --- a/net/net.c
> +++ b/net/net.c
> @@ -1117,6 +1117,164 @@ static void CDPStart(void)
>  }
>  #endif
>  
> +#ifdef CONFIG_IP_DEFRAG
> +/*
> + * This function collects fragments in a single packet, according
> + * to the algorithm in RFC815. It returns NULL or the pointer to
> + * a complete packet, in static storage
> + */
> +#ifndef CONFIG_TFTP_MAXBLOCK
> +#define CONFIG_TFTP_MAXBLOCK 16384

It is more than tftp - nfs could also use the same.

How about CONFIG_NET_MAXDEFRAG instead?

> +#endif
> +#define IP_PAYLOAD (CONFIG_TFTP_MAXBLOCK + 4)
> +#define IP_PKTSIZE (IP_PAYLOAD + IP_HDR_SIZE_NO_UDP)
> +
> +/*
> + * this is the packet being assembled, either data or frag control.
> + * Fragments go by 8 bytes, so this union must be 8 bytes long
> + */
> +struct hole {
> +	/* first_byte is address of this structure */
> +	u16 last_byte;	/* last byte in this hole + 1 (begin of next hole) */
> +	u16 next_hole;	/* index of next (in 8-b blocks), 0 == none */
> +	u16 prev_hole;	/* index of prev, 0 == none */
> +	u16 unused;
> +};
> +
> +static IP_t *__NetDefragment(IP_t *ip, int *lenp)
> +{

I don't understand the purpose of the lenp.

The calling function doesn't use the len var, except for ICMP_ECHO_REQUEST, 
which are not allowed to be fragmented.

I eliminated it - and suffered no side effects.

> +	static uchar pkt_buff[IP_PKTSIZE] __attribute__((aligned(PKTALIGN)));
> +	static u16 first_hole, total_len;
> +	struct hole *payload, *thisfrag, *h, *newh;
> +	IP_t *localip = (IP_t *)pkt_buff;
> +	uchar *indata = (uchar *)ip;
> +	int offset8, start, len, done = 0;
> +	u16 ip_off = ntohs(ip->ip_off);
> +
> +	/* payload starts after IP header, this fragment is in there */
> +	payload = (struct hole *)(pkt_buff + IP_HDR_SIZE_NO_UDP);
> +	offset8 =  (ip_off & IP_OFFS);
> +	thisfrag = payload + offset8;
> +	start = offset8 * 8;
> +	len = ntohs(ip->ip_len) - IP_HDR_SIZE_NO_UDP;
> +
> +	if (start + len > IP_PAYLOAD) /* fragment extends too far */
> +		return NULL;
> +
> +	if (!total_len || localip->ip_id != ip->ip_id) {
> +		/* new (or different) packet, reset structs */
> +		total_len = 0xffff;
> +		payload[0].last_byte = ~0;
> +		payload[0].next_hole = 0;
> +		payload[0].prev_hole = 0;
> +		first_hole = 0;
> +		/* any IP header will work, copy the first we received */
> +		memcpy(localip, ip, IP_HDR_SIZE_NO_UDP);
> +	}

I'm not sure the reset if we loose a packet, or get a bad one - start over is 
a great idea.

For some reason - why I'm ping flooding when tftping a large file (with large 
tftp block size) - things hang. If I set the block size to under the MTU - it 
works fine. Do you get the same?

I'm still poking to figure out why...

> +	/*
> +	 * What follows is the reassembly algorithm. We use the payload
> +	 * array as a linked list of hole descriptors, as each hole starts
> +	 * at a multiple of 8 bytes. However, last byte can be whaever value,
> +	 * so it is represented as byte count, not as 8-byte blocks.
> +	 */
> +
> +	h = payload + first_hole;
> +	while (h->last_byte < start) {
> +		if (!h->next_hole) {
> +			/* no hole that far away */
> +			return NULL;
> +		}
> +		h = payload + h->next_hole;
> +	}
> +
> +	if (offset8 + (len / 8) <= h - payload) {
> +		/* no overlap with holes (dup fragment?) */
> +		return NULL;
> +	}
> +
> +	if (!(ip_off & IP_FLAGS_MFRAG)) {
> +		/* no more fragmentss: truncate this (last) hole */
> +		total_len = start + len;
> +		h->last_byte = start + len;
> +	}
> +
> +	/*
> +	 * There is some overlap: fix the hole list. This code doesn't
> +	 * deal with a fragment that overlaps with two different holes
> +	 * (thus being a superset of a previously-received fragment).
> +	 */
> +
> +	if ( (h >= thisfrag) && (h->last_byte <= start + len) ) {
> +		/* complete overlap with hole: remove hole */
> +		if (!h->prev_hole && !h->next_hole) {
> +			/* last remaining hole */
> +			done = 1;
> +		} else if (!h->prev_hole) {
> +			/* first hole */
> +			first_hole = h->next_hole;
> +			payload[h->next_hole].prev_hole = 0;
> +		} else if (!h->next_hole) {
> +			/* last hole */
> +			payload[h->prev_hole].next_hole = 0;
> +		} else {
> +			/* in the middle of the list */
> +			payload[h->next_hole].prev_hole = h->prev_hole;
> +			payload[h->prev_hole].next_hole = h->next_hole;
> +		}
> +
> +	} else if (h->last_byte <= start + len) {
> +		/* overlaps with final part of the hole: shorten this hole */
> +		h->last_byte = start;
> +
> +	} else if (h >= thisfrag) {
> +		/* overlaps with initial part of the hole: move this hole */
> +		newh = thisfrag + (len / 8);
> +		*newh = *h;
> +		h = newh;
> +		if (h->next_hole)
> +			payload[h->next_hole].prev_hole = (h - payload);
> +		if (h->prev_hole)
> +			payload[h->prev_hole].next_hole = (h - payload);
> +		else
> +			first_hole = (h - payload);
> +
> +	} else {
> +		/* fragment sits in the middle: split the hole */
> +		newh = thisfrag + (len / 8);
> +		*newh = *h;
> +		h->last_byte = start;
> +		h->next_hole = (newh - payload);
> +		newh->prev_hole = (h - payload);
> +		if (newh->next_hole)
> +			payload[newh->next_hole].prev_hole = (newh - payload);
> +	}
> +
> +	/* finally copy this fragment and possibly return whole packet */
> +	memcpy((uchar *)thisfrag, indata + IP_HDR_SIZE_NO_UDP, len);
> +	if (!done)
> +		return NULL;
> +
> +	localip->ip_len = htons(total_len);
> +	*lenp = total_len + IP_HDR_SIZE_NO_UDP;
> +	return localip;
> +}
> +
> +static inline IP_t *NetDefragment(IP_t *ip, int *lenp)
> +{
> +	u16 ip_off = ntohs(ip->ip_off);
> +	if (!(ip_off & (IP_OFFS | IP_FLAGS_MFRAG)))
> +		return ip; /* not a fragment */
> +	return __NetDefragment(ip, lenp);
> +}
> +
> +#else /* !CONFIG_IP_DEFRAG */
> +
> +static inline IP_t *NetDefragment(IP_t *ip, int *lenp)
> +{
> +	return ip;
> +}
> +#endif

This needs to have the same logic (ip_off & (IP_OFFS | IP_FLAGS_MFRAG)) as the 
above function. See comment below.

>  void
>  NetReceive(volatile uchar * inpkt, int len)
> @@ -1363,10 +1521,12 @@ NetReceive(volatile uchar * inpkt, int len)
>  #ifdef ET_DEBUG
>  		puts ("Got IP\n");
>  #endif
> +		/* Before we start poking the header, make sure it is there */
>  		if (len < IP_HDR_SIZE) {
>  			debug ("len bad %d < %lu\n", len, (ulong)IP_HDR_SIZE);
>  			return;
>  		}
> +		/* Check the packet length */
>  		if (len < ntohs(ip->ip_len)) {
>  			printf("len bad %d < %d\n", len,
> ntohs(ip->ip_len));
>  			return;
> @@ -1375,21 +1535,20 @@ NetReceive(volatile uchar * inpkt, int len)
>  #ifdef ET_DEBUG
>  		printf("len=%d, v=%02x\n", len, ip->ip_hl_v & 0xff);
>  #endif
> +		/* Can't deal with anything except IPv4 */
>  		if ((ip->ip_hl_v & 0xf0) != 0x40) {
>  			return;
>  		}
> -		/* Can't deal with fragments */
> -		if (ip->ip_off & htons(IP_OFFS | IP_FLAGS_MFRAG)) {
> -			return;
> -		}
> -		/* can't deal with headers > 20 bytes */
> +		/* Can't deal with IP options (headers != 20 bytes) */
>  		if ((ip->ip_hl_v & 0x0f) > 0x05) {
>  			return;
>  		}
> +		/* Check the Checksum of the header */
>  		if (!NetCksumOk((uchar *)ip, IP_HDR_SIZE_NO_UDP / 2)) {
>  			puts ("checksum bad\n");
>  			return;
>  		}
> +		/* If it is not for us, ignore it */
>  		tmp = NetReadIP(&ip->ip_dst);
>  		if (NetOurIP && tmp != NetOurIP && tmp != 0xFFFFFFFF) {
>  #ifdef CONFIG_MCAST_TFTP
> @@ -1397,6 +1556,9 @@ NetReceive(volatile uchar * inpkt, int len)
>  #endif
>  			return;
>  		}
> +		/* If we don't have a complete packet, drop it */
> +		if (!(ip = NetDefragment(ip, &len)))
> +			return;

This will break when you have CONFIG_IP_DEFRAG not set. (it just returns the 
ip, and does not throw away fragmented packets - which it should do)...

>  		/*
>  		 * watch for ICMP host redirects
>  		 *


More information about the U-Boot mailing list