[U-Boot] [PATCH v2 2/5] ehci-hcd: Boost transfer speed
Stefan Herbrechtsmeier
stefan at herbrechtsmeier.net
Fri Jul 20 13:37:37 CEST 2012
Am 20.07.2012 13:26, schrieb Benoît Thébaudeau:
> This patch takes advantage of the hardware EHCI qTD queuing mechanism to avoid
> software overhead and to make transfers as fast as possible.
>
> The only drawback is a call to memalign. However, this is fast compared to the
> transfer timings, and the heap size to allocate is small, e.g. a little bit more
> than 100 kB for a transfer length of 65535 packets of 512 bytes.
>
> Tested on i.MX25 and i.MX35. In my test conditions, the speedup was about 15x
> using page-aligned buffers, which is really appreciable when accessing large
> files.
>
> Signed-off-by: Benoît Thébaudeau <benoit.thebaudeau at advansee.com>
> Cc: Marek Vasut <marex at denx.de>
> Cc: Ilya Yanok <ilya.yanok at cogentembedded.com>
> Cc: Stefan Herbrechtsmeier <stefan at herbrechtsmeier.net>
> ---
> Changes for v2:
> - Use DIV_ROUND_UP to make code more readable.
>
> .../drivers/usb/host/ehci-hcd.c | 92 ++++++++++++++------
> 1 file changed, 63 insertions(+), 29 deletions(-)
>
> diff --git u-boot-usb-1b4bd0e.orig/drivers/usb/host/ehci-hcd.c u-boot-usb-1b4bd0e/drivers/usb/host/ehci-hcd.c
> index 5b3b906..cf9ab92 100644
> --- u-boot-usb-1b4bd0e.orig/drivers/usb/host/ehci-hcd.c
> +++ u-boot-usb-1b4bd0e/drivers/usb/host/ehci-hcd.c
> @@ -208,7 +208,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer,
> int length, struct devrequest *req)
> {
> ALLOC_ALIGN_BUFFER(struct QH, qh, 1, USB_DMA_MINALIGN);
> - ALLOC_ALIGN_BUFFER(struct qTD, qtd, 3, USB_DMA_MINALIGN);
> + struct qTD *qtd;
> + int qtd_count = 0;
> int qtd_counter = 0;
>
> volatile struct qTD *vtd;
> @@ -229,8 +230,23 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer,
> le16_to_cpu(req->value), le16_to_cpu(req->value),
> le16_to_cpu(req->index));
>
> + if (req != NULL) /* SETUP + ACK */
> + qtd_count += 1 + 1;
> + if (length > 0 || req == NULL) { /* buffer */
> + if ((uint32_t)buffer & 4095) /* page-unaligned */
> + qtd_count += DIV_ROUND_UP(((uint32_t)buffer & 4095) +
> + length, (QT_BUFFER_CNT - 1) * 4096);
> + else /* page-aligned */
> + qtd_count += DIV_ROUND_UP(length, QT_BUFFER_CNT * 4096);
> + }
> + qtd = memalign(USB_DMA_MINALIGN, qtd_count * sizeof(struct qTD));
> + if (qtd == NULL) {
> + printf("unable to allocate TDs\n");
> + return -1;
> + }
> +
> memset(qh, 0, sizeof(struct QH));
> - memset(qtd, 0, 3 * sizeof(*qtd));
> + memset(qtd, 0, qtd_count * sizeof(*qtd));
>
> toggle = usb_gettoggle(dev, usb_pipeendpoint(pipe), usb_pipeout(pipe));
>
> @@ -291,31 +307,46 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer,
> }
>
> if (length > 0 || req == NULL) {
> - /*
> - * Setup request qTD (3.5 in ehci-r10.pdf)
> - *
> - * qt_next ................ 03-00 H
> - * qt_altnext ............. 07-04 H
> - * qt_token ............... 0B-08 H
> - *
> - * [ buffer, buffer_hi ] loaded with "buffer".
> - */
> - qtd[qtd_counter].qt_next = cpu_to_hc32(QT_NEXT_TERMINATE);
> - qtd[qtd_counter].qt_altnext = cpu_to_hc32(QT_NEXT_TERMINATE);
> - token = (toggle << 31) |
> - (length << 16) |
> - ((req == NULL ? 1 : 0) << 15) |
> - (0 << 12) |
> - (3 << 10) |
> - ((usb_pipein(pipe) ? 1 : 0) << 8) | (0x80 << 0);
> - qtd[qtd_counter].qt_token = cpu_to_hc32(token);
> - if (ehci_td_buffer(&qtd[qtd_counter], buffer, length) != 0) {
> - printf("unable construct DATA td\n");
> - goto fail;
> - }
> - /* Update previous qTD! */
> - *tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]);
> - tdp = &qtd[qtd_counter++].qt_next;
> + uint8_t *buf_ptr = buffer;
> + int left_length = length;
> +
> + do {
> + int xfr_bytes = min(left_length,
> + (QT_BUFFER_CNT * 4096 -
> + ((uint32_t)buf_ptr & 4095)) &
> + ~4095);
Why you align the length to 4096?
> +
> + /*
> + * Setup request qTD (3.5 in ehci-r10.pdf)
> + *
> + * qt_next ................ 03-00 H
> + * qt_altnext ............. 07-04 H
> + * qt_token ............... 0B-08 H
> + *
> + * [ buffer, buffer_hi ] loaded with "buffer".
> + */
> + qtd[qtd_counter].qt_next =
> + cpu_to_hc32(QT_NEXT_TERMINATE);
> + qtd[qtd_counter].qt_altnext =
> + cpu_to_hc32(QT_NEXT_TERMINATE);
> + token = (toggle << 31) |
> + (xfr_bytes << 16) |
> + ((req == NULL ? 1 : 0) << 15) |
> + (0 << 12) |
> + (3 << 10) |
> + ((usb_pipein(pipe) ? 1 : 0) << 8) | (0x80 << 0);
> + qtd[qtd_counter].qt_token = cpu_to_hc32(token);
> + if (ehci_td_buffer(&qtd[qtd_counter], buf_ptr,
> + xfr_bytes) != 0) {
> + printf("unable construct DATA td\n");
> + goto fail;
> + }
> + /* Update previous qTD! */
> + *tdp = cpu_to_hc32((uint32_t)&qtd[qtd_counter]);
> + tdp = &qtd[qtd_counter++].qt_next;
> + buf_ptr += xfr_bytes;
> + left_length -= xfr_bytes;
> + } while (left_length > 0);
> }
>
> if (req != NULL) {
> @@ -346,7 +377,8 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer,
> flush_dcache_range((uint32_t)qh_list,
> ALIGN_END_ADDR(struct QH, qh_list, 1));
> flush_dcache_range((uint32_t)qh, ALIGN_END_ADDR(struct QH, qh, 1));
> - flush_dcache_range((uint32_t)qtd, ALIGN_END_ADDR(struct qTD, qtd, 3));
> + flush_dcache_range((uint32_t)qtd,
> + ALIGN_END_ADDR(struct qTD, qtd, qtd_count));
>
> /* Set async. queue head pointer. */
> ehci_writel(&hcor->or_asynclistaddr, (uint32_t)qh_list);
> @@ -377,7 +409,7 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer,
> invalidate_dcache_range((uint32_t)qh,
> ALIGN_END_ADDR(struct QH, qh, 1));
> invalidate_dcache_range((uint32_t)qtd,
> - ALIGN_END_ADDR(struct qTD, qtd, 3));
> + ALIGN_END_ADDR(struct qTD, qtd, qtd_count));
>
> token = hc32_to_cpu(vtd->qt_token);
> if (!(token & 0x80))
> @@ -450,9 +482,11 @@ ehci_submit_async(struct usb_device *dev, unsigned long pipe, void *buffer,
> ehci_readl(&hcor->or_portsc[1]));
> }
>
> + free(qtd);
> return (dev->status != USB_ST_NOT_PROC) ? 0 : -1;
>
> fail:
> + free(qtd);
> return -1;
> }
>
More information about the U-Boot
mailing list