[U-Boot] [PATCH v1 2/3] lib: rsa: generate additional parameters for public key

Wed Oct 9 05:30:43 UTC 2019

In the current implementation of FIT_SIGNATURE, five parameters for
a RSA public key are required while only two of them are essential.
(See rsa-mod-exp.h and uImage.FIT/signature.txt)
This is a result of considering relatively limited computer power
and resources on embedded systems, while such a assumption may not
be quite practical for other use cases.

In this patch, added is a function, rsa_gen_key_prop(), which will
generate additional parameters for other uses, in particular
UEFI secure boot, on the fly.

Note: the current code uses some "big number" routines from BearSSL
for the calculation.

Signed-off-by: AKASHI Takahiro <takahiro.akashi at linaro.org>
---
 include/u-boot/rsa-mod-exp.h |   3 +
 lib/rsa/Kconfig              |   7 +
 lib/rsa/Makefile             |   1 +
 lib/rsa/rsa-keyprop.c        | 585 +++++++++++++++++++++++++++++++++++
 4 files changed, 596 insertions(+)
 create mode 100644 lib/rsa/rsa-keyprop.c

diff --git a/include/u-boot/rsa-mod-exp.h b/include/u-boot/rsa-mod-exp.h
index 8a428c4b6a1a..ca189292d869 100644
--- a/include/u-boot/rsa-mod-exp.h
+++ b/include/u-boot/rsa-mod-exp.h
@@ -26,6 +26,9 @@ struct key_prop {
 	uint32_t exp_len;	/* Exponent length in number of uint8_t */
 };
 
+struct key_prop *rsa_gen_key_prop(const void *key, uint32_t keylen);
+void rsa_free_key_prop(struct key_prop *prop);
+
 /**
  * rsa_mod_exp_sw() - Perform RSA Modular Exponentiation in sw
  *
diff --git a/lib/rsa/Kconfig b/lib/rsa/Kconfig
index 62b7ab9c5e5c..d1743d7a4c47 100644
--- a/lib/rsa/Kconfig
+++ b/lib/rsa/Kconfig
@@ -30,6 +30,13 @@ config RSA_VERIFY
 	help
 	  Add RSA signature verification support.
 
+config RSA_VERIFY_WITH_PKEY
+	bool "Execute RSA verification without key parameters from FDT"
+	depends on RSA
+	help
+	  This options enables RSA signature verification without
+	  using public key parameters which is embedded control FDT.
+
 config RSA_SOFTWARE_EXP
 	bool "Enable driver for RSA Modular Exponentiation in software"
 	depends on DM
diff --git a/lib/rsa/Makefile b/lib/rsa/Makefile
index c07305188e0c..14ed3cb4012b 100644
--- a/lib/rsa/Makefile
+++ b/lib/rsa/Makefile
@@ -6,4 +6,5 @@
 # Wolfgang Denk, DENX Software Engineering, wd at denx.de.
 
 obj-$(CONFIG_$(SPL_)RSA_VERIFY) += rsa-verify.o rsa-checksum.o
+obj-$(CONFIG_RSA_VERIFY_WITH_PKEY) += rsa-keyprop.o
 obj-$(CONFIG_RSA_SOFTWARE_EXP) += rsa-mod-exp.o
diff --git a/lib/rsa/rsa-keyprop.c b/lib/rsa/rsa-keyprop.c
new file mode 100644
index 000000000000..d7d222e9bed9
--- /dev/null
+++ b/lib/rsa/rsa-keyprop.c
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: GPL-2.0+ and MIT
+/*
+ * RSA library - generate parameters for a public key
+ *
+ * Copyright (c) 2019 Linaro Limited
+ * Author: AKASHI Takahiro
+ *
+ * Big number routines in this file come from BearSSL:
+ * Copyright (c) 2016 Thomas Pornin <pornin at bolet.org>
+ */
+
+#include <common.h>
+#include <image.h>
+#include <malloc.h>
+#include <asm/byteorder.h>
+#include <crypto/internal/rsa.h>
+#include <u-boot/rsa-mod-exp.h>
+
+static inline unsigned
+br_dec16be(const void *src)
+{
+	return be16_to_cpup(src);
+}
+
+static inline uint32_t
+br_dec32be(const void *src)
+{
+	return be32_to_cpup(src);
+}
+
+static inline void
+br_enc32be(void *dst, uint32_t x)
+{
+	__be32 tmp;
+
+	tmp = cpu_to_be32(x);
+	memcpy(dst, &tmp, sizeof(tmp));
+}
+
+/* stripped version of src/inner.h */
+
+static inline uint32_t
+NOT(uint32_t ctl)
+{
+	return ctl ^ 1;
+}
+
+static inline uint32_t
+MUX(uint32_t ctl, uint32_t x, uint32_t y)
+{
+	return y ^ (-ctl & (x ^ y));
+}
+
+static inline uint32_t
+EQ(uint32_t x, uint32_t y)
+{
+	uint32_t q;
+
+	q = x ^ y;
+	return NOT((q | -q) >> 31);
+}
+
+static inline uint32_t
+NEQ(uint32_t x, uint32_t y)
+{
+	uint32_t q;
+
+	q = x ^ y;
+	return (q | -q) >> 31;
+}
+
+static inline uint32_t
+GT(uint32_t x, uint32_t y)
+{
+	/*
+	 * If both x < 2^31 and y < 2^31, then y-x will have its high
+	 * bit set if x > y, cleared otherwise.
+	 *
+	 * If either x >= 2^31 or y >= 2^31 (but not both), then the
+	 * result is the high bit of x.
+	 *
+	 * If both x >= 2^31 and y >= 2^31, then we can virtually
+	 * subtract 2^31 from both, and we are back to the first case.
+	 * Since (y-2^31)-(x-2^31) = y-x, the subtraction is already
+	 * fine.
+	 */
+	uint32_t z;
+
+	z = y - x;
+	return (z ^ ((x ^ y) & (x ^ z))) >> 31;
+}
+
+static inline uint32_t
+BIT_LENGTH(uint32_t x)
+{
+	uint32_t k, c;
+
+	k = NEQ(x, 0);
+	c = GT(x, 0xFFFF); x = MUX(c, x >> 16, x); k += c << 4;
+	c = GT(x, 0x00FF); x = MUX(c, x >>  8, x); k += c << 3;
+	c = GT(x, 0x000F); x = MUX(c, x >>  4, x); k += c << 2;
+	c = GT(x, 0x0003); x = MUX(c, x >>  2, x); k += c << 1;
+	k += GT(x, 0x0001);
+	return k;
+}
+
+#define GE(x, y)   NOT(GT(y, x))
+#define LT(x, y)   GT(y, x)
+#define MUL(x, y)   ((uint64_t)(x) * (uint64_t)(y))
+
+static inline uint32_t
+br_i32_word(const uint32_t *a, uint32_t off)
+{
+	size_t u;
+	unsigned j;
+
+	u = (size_t)(off >> 5) + 1;
+	j = (unsigned)off & 31;
+	if (j == 0) {
+		return a[u];
+	} else {
+		return (a[u] >> j) | (a[u + 1] << (32 - j));
+	}
+}
+
+/* src/int/i32_bitlen.c */
+
+static uint32_t
+br_i32_bit_length(uint32_t *x, size_t xlen)
+{
+	uint32_t tw, twk;
+
+	tw = 0;
+	twk = 0;
+	while (xlen -- > 0) {
+		uint32_t w, c;
+
+		c = EQ(tw, 0);
+		w = x[xlen];
+		tw = MUX(c, w, tw);
+		twk = MUX(c, (uint32_t)xlen, twk);
+	}
+	return (twk << 5) + BIT_LENGTH(tw);
+}
+
+/* src/int/i32_decode.c */
+
+static void
+br_i32_decode(uint32_t *x, const void *src, size_t len)
+{
+	const unsigned char *buf;
+	size_t u, v;
+
+	buf = src;
+	u = len;
+	v = 1;
+	for (;;) {
+		if (u < 4) {
+			uint32_t w;
+
+			if (u < 2) {
+				if (u == 0) {
+					break;
+				} else {
+					w = buf[0];
+				}
+			} else {
+				if (u == 2) {
+					w = br_dec16be(buf);
+				} else {
+					w = ((uint32_t)buf[0] << 16)
+						| br_dec16be(buf + 1);
+				}
+			}
+			x[v ++] = w;
+			break;
+		} else {
+			u -= 4;
+			x[v ++] = br_dec32be(buf + u);
+		}
+	}
+	x[0] = br_i32_bit_length(x + 1, v - 1);
+}
+
+/* src/int/i32_encode.c */
+
+static void
+br_i32_encode(void *dst, size_t len, const uint32_t *x)
+{
+	unsigned char *buf;
+	size_t k;
+
+	buf = dst;
+
+	/*
+	 * Compute the announced size of x in bytes; extra bytes are
+	 * filled with zeros.
+	 */
+	k = (x[0] + 7) >> 3;
+	while (len > k) {
+		*buf ++ = 0;
+		len --;
+	}
+
+	/*
+	 * Now we use k as index within x[]. That index starts at 1;
+	 * we initialize it to the topmost complete word, and process
+	 * any remaining incomplete word.
+	 */
+	k = (len + 3) >> 2;
+	switch (len & 3) {
+	case 3:
+		*buf ++ = x[k] >> 16;
+		/* fall through */
+	case 2:
+		*buf ++ = x[k] >> 8;
+		/* fall through */
+	case 1:
+		*buf ++ = x[k];
+		k --;
+	}
+
+	/*
+	 * Encode all complete words.
+	 */
+	while (k > 0) {
+		br_enc32be(buf, x[k]);
+		k --;
+		buf += 4;
+	}
+}
+
+/* src/int/i32_ninv32.c */
+
+static uint32_t
+br_i32_ninv32(uint32_t x)
+{
+	uint32_t y;
+
+	y = 2 - x;
+	y *= 2 - y * x;
+	y *= 2 - y * x;
+	y *= 2 - y * x;
+	y *= 2 - y * x;
+	return MUX(x & 1, -y, 0);
+}
+
+/* src/int/i32_add.c */
+
+static uint32_t
+br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl)
+{
+	uint32_t cc;
+	size_t u, m;
+
+	cc = 0;
+	m = (a[0] + 63) >> 5;
+	for (u = 1; u < m; u ++) {
+		uint32_t aw, bw, naw;
+
+		aw = a[u];
+		bw = b[u];
+		naw = aw + bw + cc;
+
+		/*
+		 * Carry is 1 if naw < aw. Carry is also 1 if naw == aw
+		 * AND the carry was already 1.
+		 */
+		cc = (cc & EQ(naw, aw)) | LT(naw, aw);
+		a[u] = MUX(ctl, naw, aw);
+	}
+	return cc;
+}
+
+/* src/int/i32_sub.c */
+
+static uint32_t
+br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl)
+{
+	uint32_t cc;
+	size_t u, m;
+
+	cc = 0;
+	m = (a[0] + 63) >> 5;
+	for (u = 1; u < m; u ++) {
+		uint32_t aw, bw, naw;
+
+		aw = a[u];
+		bw = b[u];
+		naw = aw - bw - cc;
+
+		/*
+		 * Carry is 1 if naw > aw. Carry is 1 also if naw == aw
+		 * AND the carry was already 1.
+		 */
+		cc = (cc & EQ(naw, aw)) | GT(naw, aw);
+		a[u] = MUX(ctl, naw, aw);
+	}
+	return cc;
+}
+
+/* src/int/i32_div32.c */
+
+static uint32_t
+br_divrem(uint32_t hi, uint32_t lo, uint32_t d, uint32_t *r)
+{
+	/* TODO: optimize this */
+	uint32_t q;
+	uint32_t ch, cf;
+	int k;
+
+	q = 0;
+	ch = EQ(hi, d);
+	hi = MUX(ch, 0, hi);
+	for (k = 31; k > 0; k --) {
+		int j;
+		uint32_t w, ctl, hi2, lo2;
+
+		j = 32 - k;
+		w = (hi << j) | (lo >> k);
+		ctl = GE(w, d) | (hi >> k);
+		hi2 = (w - d) >> j;
+		lo2 = lo - (d << k);
+		hi = MUX(ctl, hi2, hi);
+		lo = MUX(ctl, lo2, lo);
+		q |= ctl << k;
+	}
+	cf = GE(lo, d) | hi;
+	q |= cf;
+	*r = MUX(cf, lo - d, lo);
+	return q;
+}
+
+static inline uint32_t
+br_rem(uint32_t hi, uint32_t lo, uint32_t d)
+{
+	uint32_t r;
+
+	br_divrem(hi, lo, d, &r);
+	return r;
+}
+
+static inline uint32_t
+br_div(uint32_t hi, uint32_t lo, uint32_t d)
+{
+	uint32_t r;
+
+	return br_divrem(hi, lo, d, &r);
+}
+
+/* src/int/i32_muladd.c */
+
+static void
+br_i32_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m)
+{
+	uint32_t m_bitlen;
+	size_t u, mlen;
+	uint32_t a0, a1, b0, hi, g, q, tb;
+	uint32_t chf, clow, under, over;
+	uint64_t cc;
+
+	/*
+	 * We can test on the modulus bit length since we accept to
+	 * leak that length.
+	 */
+	m_bitlen = m[0];
+	if (m_bitlen == 0) {
+		return;
+	}
+	if (m_bitlen <= 32) {
+		x[1] = br_rem(x[1], z, m[1]);
+		return;
+	}
+	mlen = (m_bitlen + 31) >> 5;
+
+	/*
+	 * Principle: we estimate the quotient (x*2^32+z)/m by
+	 * doing a 64/32 division with the high words.
+	 *
+	 * Let:
+	 *   w = 2^32
+	 *   a = (w*a0 + a1) * w^N + a2
+	 *   b = b0 * w^N + b2
+	 * such that:
+	 *   0 <= a0 < w
+	 *   0 <= a1 < w
+	 *   0 <= a2 < w^N
+	 *   w/2 <= b0 < w
+	 *   0 <= b2 < w^N
+	 *   a < w*b
+	 * I.e. the two top words of a are a0:a1, the top word of b is
+	 * b0, we ensured that b0 is "full" (high bit set), and a is
+	 * such that the quotient q = a/b fits on one word (0 <= q < w).
+	 *
+	 * If a = b*q + r (with 0 <= r < q), we can estimate q by
+	 * doing an Euclidean division on the top words:
+	 *   a0*w+a1 = b0*u + v  (with 0 <= v < w)
+	 * Then the following holds:
+	 *   0 <= u <= w
+	 *   u-2 <= q <= u
+	 */
+	a0 = br_i32_word(x, m_bitlen - 32);
+	hi = x[mlen];
+	memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
+	x[1] = z;
+	a1 = br_i32_word(x, m_bitlen - 32);
+	b0 = br_i32_word(m, m_bitlen - 32);
+
+	/*
+	 * We estimate a divisor q. If the quotient returned by br_div()
+	 * is g:
+	 * -- If a0 == b0 then g == 0; we want q = 0xFFFFFFFF.
+	 * -- Otherwise:
+	 *    -- if g == 0 then we set q = 0;
+	 *    -- otherwise, we set q = g - 1.
+	 * The properties described above then ensure that the true
+	 * quotient is q-1, q or q+1.
+	 */
+	g = br_div(a0, a1, b0);
+	q = MUX(EQ(a0, b0), 0xFFFFFFFF, MUX(EQ(g, 0), 0, g - 1));
+
+	/*
+	 * We subtract q*m from x (with the extra high word of value 'hi').
+	 * Since q may be off by 1 (in either direction), we may have to
+	 * add or subtract m afterwards.
+	 *
+	 * The 'tb' flag will be true (1) at the end of the loop if the
+	 * result is greater than or equal to the modulus (not counting
+	 * 'hi' or the carry).
+	 */
+	cc = 0;
+	tb = 1;
+	for (u = 1; u <= mlen; u ++) {
+		uint32_t mw, zw, xw, nxw;
+		uint64_t zl;
+
+		mw = m[u];
+		zl = MUL(mw, q) + cc;
+		cc = (uint32_t)(zl >> 32);
+		zw = (uint32_t)zl;
+		xw = x[u];
+		nxw = xw - zw;
+		cc += (uint64_t)GT(nxw, xw);
+		x[u] = nxw;
+		tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw));
+	}
+
+	/*
+	 * If we underestimated q, then either cc < hi (one extra bit
+	 * beyond the top array word), or cc == hi and tb is true (no
+	 * extra bit, but the result is not lower than the modulus). In
+	 * these cases we must subtract m once.
+	 *
+	 * Otherwise, we may have overestimated, which will show as
+	 * cc > hi (thus a negative result). Correction is adding m once.
+	 */
+	chf = (uint32_t)(cc >> 32);
+	clow = (uint32_t)cc;
+	over = chf | GT(clow, hi);
+	under = ~over & (tb | (~chf & LT(clow, hi)));
+	br_i32_add(x, m, over);
+	br_i32_sub(x, m, under);
+}
+
+/* src/int/i32_reduce.c */
+
+static void
+br_i32_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m)
+{
+	uint32_t m_bitlen, a_bitlen;
+	size_t mlen, alen, u;
+
+	m_bitlen = m[0];
+	mlen = (m_bitlen + 31) >> 5;
+
+	x[0] = m_bitlen;
+	if (m_bitlen == 0) {
+		return;
+	}
+
+	/*
+	 * If the source is shorter, then simply copy all words from a[]
+	 * and zero out the upper words.
+	 */
+	a_bitlen = a[0];
+	alen = (a_bitlen + 31) >> 5;
+	if (a_bitlen < m_bitlen) {
+		memcpy(x + 1, a + 1, alen * sizeof *a);
+		for (u = alen; u < mlen; u ++) {
+			x[u + 1] = 0;
+		}
+		return;
+	}
+
+	/*
+	 * The source length is at least equal to that of the modulus.
+	 * We must thus copy N-1 words, and input the remaining words
+	 * one by one.
+	 */
+	memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a);
+	x[mlen] = 0;
+	for (u = 1 + alen - mlen; u > 0; u --) {
+		br_i32_muladd_small(x, a[u], m);
+	}
+}
+
+void rsa_free_key_prop(struct key_prop *prop)
+{
+	if (!prop)
+		return;
+
+	free((void *)prop->modulus);
+	free((void *)prop->public_exponent);
+	free((void *)prop->rr);
+
+	free(prop);
+}
+
+struct key_prop *rsa_gen_key_prop(const void *key, uint32_t keylen)
+{
+	struct key_prop *prop;
+	struct rsa_key rsa_key;
+#define BR_MAX_RSA_SIZE 4096
+	uint32_t *n, *rr, *rrtmp;
+	int rlen, i, ret;
+
+	prop = calloc(sizeof(*prop), 1);
+	if (!prop)
+		return NULL;
+	n = calloc(sizeof(uint32_t), 1 + (BR_MAX_RSA_SIZE >> 5));
+	rr = calloc(sizeof(uint32_t), 1 + (BR_MAX_RSA_SIZE >> 5));
+	rrtmp = calloc(sizeof(uint32_t), 1 + (BR_MAX_RSA_SIZE >> 5));
+	if (!n || !rr || !rrtmp)
+		return NULL;
+
+	ret = rsa_parse_pub_key(&rsa_key, key, keylen);
+	if (ret)
+		goto err;
+
+	/* modulus */
+	/* removing leading 0's */
+	for (i = 0; i < rsa_key.n_sz && !rsa_key.n[i]; i++)
+		;
+	prop->num_bits = (rsa_key.n_sz - i) * 8;
+	prop->modulus = malloc(rsa_key.n_sz - i);
+	if (!prop->modulus)
+		goto err;
+	memcpy((void *)prop->modulus, &rsa_key.n[i], rsa_key.n_sz - i);
+
+	/* exponent */
+	prop->public_exponent = calloc(1, sizeof(uint64_t));
+	if (!prop->public_exponent)
+		goto err;
+	memcpy((void *)prop->public_exponent + sizeof(uint64_t) - rsa_key.e_sz,
+	       rsa_key.e, rsa_key.e_sz);
+	prop->exp_len = rsa_key.e_sz;
+
+	/* n0 inverse */
+	br_i32_decode(n, &rsa_key.n[i], rsa_key.n_sz - i);
+	prop->n0inv = br_i32_ninv32(n[1]);
+
+	/* R^2 mod n; R = 2^(num_bits) */
+	rlen = prop->num_bits * 2; /* #bits of R^2 = (2^num_bits)^2 */
+	rr[0] = 0;
+	*(uint8_t *)&rr[0] = (1 << (rlen % 8));
+	for (i = 1; i < (((rlen + 31) >> 5) + 1); i++)
+		rr[i] = 0;
+	br_i32_decode(rrtmp, rr, ((rlen + 7) >> 3) + 1);
+	br_i32_reduce(rr, rrtmp, n);
+
+	rlen = (prop->num_bits + 7) >> 3; /* #bytes of R^2 mod n */
+	prop->rr = malloc(rlen);
+	if (!prop->rr)
+		goto err;
+	br_i32_encode((void *)prop->rr, rlen, rr);
+
+	return prop;
+
+err:
+	free(n);
+	free(rr);
+	free(rrtmp);
+	rsa_free_key_prop(prop);
+	return NULL;
+}
-- 
2.21.0