[PATCH] gunzip: Implement chunked decompression
Simon Glass
sjg at chromium.org
Wed Feb 4 01:22:50 CET 2026
Hi Marek,
On Wed, 28 Jan 2026 at 13:03, Marek Vasut
<marek.vasut+renesas at mailbox.org> wrote:
>
> The current gzwrite() implementation is limited to 4 GiB compressed
> input buffer size due to struct z_stream_s { uInt avail_in } member,
> which is of type unsigned int. Current gzwrite() implementation sets
> the entire input buffer size as avail_in and performs decompression
> of the whole compressed input buffer in one round, which limits the
> size of input buffer to 4 GiB.
>
> Rework the decompression loop to use chunked approach, and decompress
> the input buffer in up to 4 GiB - 1 kiB avail_in chunks, possibly in
> multiple decompression rounds. This way, the compressed input buffer
> size is limited by gzwrite() function 'len' parameter type, which is
> unsigned long.
>
> In case of sandbox build, include parsing of 'gzwrite_chunk'
> environment variable, so the chunked approach can be thoroughly tested
> with non default chunk size. For non-sandbox builds, the chunk size is
> 4 GiB - 1 kiB.
>
> The gzwrite test case is extended to test various chunk sizes during
> gzwrite decompression test.
>
> Signed-off-by: Marek Vasut <marek.vasut+renesas at mailbox.org>
> ---
> Cc: Alexander Graf <agraf at csgraf.de>
> Cc: Heinrich Schuchardt <xypron.glpk at gmx.de>
> Cc: Ilias Apalodimas <ilias.apalodimas at linaro.org>
> Cc: Jerome Forissier <jerome at forissier.org>
> Cc: Mattijs Korpershoek <mkorpershoek at kernel.org>
> Cc: Neil Armstrong <neil.armstrong at linaro.org>
> Cc: Peng Fan <peng.fan at nxp.com>
> Cc: Quentin Schulz <quentin.schulz at cherry.de>
> Cc: Simon Glass <sjg at chromium.org>
> Cc: Tom Rini <trini at konsulko.com>
> Cc: Yuya Hamamachi <yuya.hamamachi.sx at renesas.com>
> Cc: u-boot at lists.denx.de
> ---
> This depends on multiple fixes, and the actual unit test for gzwrite command:
> https://lore.kernel.org/u-boot/20260127235156.1503207-1-marek.vasut+renesas@mailbox.org/
> https://lore.kernel.org/u-boot/20260127235310.1503239-1-marek.vasut+renesas@mailbox.org/
> https://lore.kernel.org/u-boot/20260127235339.1503312-1-marek.vasut+renesas@mailbox.org/
> https://lore.kernel.org/u-boot/20260127235914.1503663-1-marek.vasut+renesas@mailbox.org/
> ---
> lib/gunzip.c | 77 +++++++++++++++++++++++++++++++++---------------
> test/cmd/unzip.c | 12 +++++++-
> 2 files changed, 64 insertions(+), 25 deletions(-)
>
> diff --git a/lib/gunzip.c b/lib/gunzip.c
> index d31bbb2ba03..0e4bca28a5e 100644
> --- a/lib/gunzip.c
> +++ b/lib/gunzip.c
> @@ -8,8 +8,10 @@
> #include <command.h>
> #include <console.h>
> #include <div64.h>
> +#include <env.h>
> #include <gzip.h>
> #include <image.h>
> +#include <linux/sizes.h>
> #include <malloc.h>
> #include <memalign.h>
> #include <u-boot/crc.h>
> @@ -124,7 +126,7 @@ void gzwrite_progress_finish(int returnval,
> int gzwrite(unsigned char *src, unsigned long len, struct blk_desc *dev,
> ulong szwritebuf, ulong startoffs, ulong szexpected)
> {
> - int i, flags;
> + int flags;
> z_stream s;
> int r = 0;
> unsigned char *writebuf;
> @@ -132,14 +134,23 @@ int gzwrite(unsigned char *src, unsigned long len, struct blk_desc *dev,
> ulong totalfilled = 0;
> lbaint_t blksperbuf, outblock;
> u32 expected_crc;
> - u32 payload_size;
> + unsigned long i, payload_size;
> + unsigned long blocks_written;
> + lbaint_t writeblocks;
> + int numfilled = 0;
> int iteration = 0;
> -
> - if (len > 0xffffffff) {
> - printf("%s: input size over 4 GiB in size not supported\n",
> - __func__);
> - return -1;
> - }
> + /*
> + * Allow runtime configuration of decompression chunk on
> + * sandbox to better cover the chunked decompression
> + * functionality without having to use > 4 GiB files.
> + */
> + const ulong minchunk = 0x400;
> + const ulong maxchunk = SZ_4G - minchunk;
> + const ulong chunk =
> + CONFIG_IS_ENABLED(SANDBOX,
> + (clamp(env_get_ulong("gzwrite_chunk", 10, maxchunk),
> + minchunk, maxchunk)),
> + (maxchunk));
>
> if (!szwritebuf ||
> (szwritebuf % dev->blksz) ||
> @@ -181,7 +192,7 @@ int gzwrite(unsigned char *src, unsigned long len, struct blk_desc *dev,
> return -1;
> }
>
> - payload_size = len - i - 8;
> + payload_size = len - i;
>
> memcpy(&expected_crc, src + len - 8, sizeof(expected_crc));
> expected_crc = le32_to_cpu(expected_crc);
> @@ -211,35 +222,44 @@ int gzwrite(unsigned char *src, unsigned long len, struct blk_desc *dev,
> return -1;
> }
>
> - s.next_in = src + i;
> - s.avail_in = payload_size+8;
> + src += i;
> + s.avail_in = 0;
> writebuf = (unsigned char *)malloc_cache_aligned(szwritebuf);
>
> /* decompress until deflate stream ends or end of file */
> do {
> if (s.avail_in == 0) {
> - printf("%s: weird termination with result %d\n",
> - __func__, r);
> - break;
> + if (payload_size == 0) {
> + printf("%s: weird termination with result %d\n",
> + __func__, r);
> + break;
> + }
> +
> + s.next_in = src;
> + s.avail_in = (payload_size > chunk) ? chunk : payload_size;
> + src += s.avail_in;
> + payload_size -= s.avail_in;
> }
>
> /* run inflate() on input until output buffer not full */
> do {
> - unsigned long blocks_written;
> - int numfilled;
> - lbaint_t writeblocks;
> -
> - s.avail_out = szwritebuf;
> - s.next_out = writebuf;
> + if (numfilled) {
> + s.avail_out = szwritebuf - numfilled;
> + s.next_out = writebuf + numfilled;
> + } else {
> + s.avail_out = szwritebuf;
> + s.next_out = writebuf;
> + }
> r = inflate(&s, Z_SYNC_FLUSH);
> if ((r != Z_OK) &&
> (r != Z_STREAM_END)) {
> printf("Error: inflate() returned %d\n", r);
> goto out;
> }
> + crc = crc32(crc, writebuf + numfilled,
> + szwritebuf - s.avail_out - numfilled);
> + totalfilled += szwritebuf - s.avail_out - numfilled;
> numfilled = szwritebuf - s.avail_out;
> - crc = crc32(crc, writebuf, numfilled);
> - totalfilled += numfilled;
> if (numfilled < szwritebuf) {
> writeblocks = (numfilled+dev->blksz-1)
> / dev->blksz;
> @@ -247,14 +267,17 @@ int gzwrite(unsigned char *src, unsigned long len, struct blk_desc *dev,
> dev->blksz-(numfilled%dev->blksz));
> } else {
> writeblocks = blksperbuf;
> + numfilled = 0;
> }
>
> gzwrite_progress(iteration++,
> totalfilled,
> szexpected);
> - blocks_written = blk_dwrite(dev, outblock,
> + if (!numfilled) {
> + blocks_written = blk_dwrite(dev, outblock,
> writeblocks, writebuf);
> - outblock += blocks_written;
> + outblock += blocks_written;
> + }
> if (ctrlc()) {
> puts("abort\n");
> goto out;
> @@ -264,6 +287,12 @@ int gzwrite(unsigned char *src, unsigned long len, struct blk_desc *dev,
> /* done when inflate() says it's done */
> } while (r != Z_STREAM_END);
>
> + if (numfilled) {
> + blocks_written = blk_dwrite(dev, outblock,
> + writeblocks, writebuf);
> + outblock += blocks_written;
> + }
> +
> if ((szexpected != totalfilled) ||
> (crc != expected_crc))
> r = -1;
> diff --git a/test/cmd/unzip.c b/test/cmd/unzip.c
> index 725fcf91458..7c54cc7a815 100644
> --- a/test/cmd/unzip.c
> +++ b/test/cmd/unzip.c
> @@ -100,7 +100,7 @@ static int dm_test_cmd_zip_gzwrite(struct unit_test_state *uts)
> struct blk_desc *mmc_dev_desc;
> struct udevice *dev;
> ofnode root, node;
> - int i, ret;
> + int i, j, ret;
>
> /* Enable the mmc9 node for this test */
> root = oftree_root(oftree_default());
> @@ -122,6 +122,16 @@ static int dm_test_cmd_zip_gzwrite(struct unit_test_state *uts)
> return ret;
> }
>
> + /* Test various sizes of decompression chunk sizes */
> + for (j = 0; j < ARRAY_SIZE(sizes); j++) {
> + env_set_ulong("gzwrite_chunk", sizes[j]);
> + for (i = 0; i < ARRAY_SIZE(sizes); i++) {
> + ret = do_test_cmd_zip_unzip(uts, sizes[i], true);
> + if (ret)
> + return ret;
> + }
> + }
> +
> return 0;
> }
> DM_TEST(dm_test_cmd_zip_gzwrite, UTF_CONSOLE);
> --
> 2.51.0
>
Rather than an environment variable, you could create a variant of
gzwrite() which allows it to be passed.
Regards,
Simon
More information about the U-Boot
mailing list