[PATCH] nand: Add a watch command
Miquel Raynal
miquel.raynal at bootlin.com
Tue Nov 28 11:56:11 CET 2023
This is a debug command to monitor the retention state of the data on
the array. The command needs a duplication of the mtd_read_oob()
function to actually return the maximum number of bitflips encountered
while reading the page. We could write a specific implementation for the
Sunxi driver but this is probably enough.
nand watch <off> <size> - check an area for bitflips
nand watch.part <part> - check a partition for bitflips
nand watch.chip - check the whole device for bitflips
The output may be a bit verbose and could look like:
=> nand watch.chip
device 0 whole chip
size adjusted to 0xff60000 (5 bad blocks)
NAND watch for bitflips in area 0x0-0xff60000:
Page 0 (0x00000000) -> error -74
Page 1 (0x00000800) -> error -74
Page 2 (0x00001000) -> error -74
Page 3 (0x00001800) -> error -74
Page 4 (0x00002000) -> error -74
Page 5 (0x00002800) -> error -74
Page 6 (0x00003000) -> error -74
Page 7 (0x00003800) -> error -74
Page 8 (0x00004000) -> error -74
Page 9 (0x00004800) -> error -74
Page 10 (0x00005000) -> error -74
Page 11 (0x00005800) -> error -74
Page 12 (0x00006000) -> error -74
Page 13 (0x00006800) -> error -74
Page 14 (0x00007000) -> error -74
Page 15 (0x00007800) -> error -74
Page 16 (0x00008000) -> error -74
Page 17 (0x00008800) -> error -74
Page 18 (0x00009000) -> error -74
Page 19 (0x00009800) -> error -74
Page 20 (0x0000a000) -> error -74
Page 21 (0x0000a800) -> error -74
Page 22 (0x0000b000) -> error -74
Page 23 (0x0000b800) -> error -74
Page 1110 (0x0022b000) -> up to 1 bf/chunk
Page 1122 (0x00231000) -> up to 1 bf/chunk
Page 1132 (0x00236000) -> up to 1 bf/chunk
Page 1362 (0x002a9000) -> up to 1 bf/chunk
Page 4990 (0x009bf000) -> up to 1 bf/chunk
Page 5728 (0x00b30000) -> up to 1 bf/chunk
Page 7116 (0x00de6000) -> up to 1 bf/chunk
Page 7160 (0x00dfc000) -> up to 1 bf/chunk
Page 7494 (0x00ea3000) -> up to 1 bf/chunk
Page 10842 (0x0152d000) -> up to 1 bf/chunk
Page 11614 (0x016af000) -> up to 1 bf/chunk
Page 11970 (0x01761000) -> up to 1 bf/chunk
Page 12536 (0x0187c000) -> up to 1 bf/chunk
Page 12687 (0x018c7800) -> up to 1 bf/chunk
Page 14298 (0x01bed000) -> up to 1 bf/chunk
Page 18268 (0x023ae000) -> up to 1 bf/chunk
Page 18760 (0x024a4000) -> up to 1 bf/chunk
Page 21440 (0x029e0000) -> up to 1 bf/chunk
Page 22336 (0x02ba0000) -> up to 1 bf/chunk
Page 22592 (0x02c20000) -> up to 1 bf/chunk
Page 23872 (0x02ea0000) -> up to 1 bf/chunk
Page 27584 (0x035e0000) -> up to 1 bf/chunk
Page 35008 (0x04460000) -> up to 1 bf/chunk
Page 37184 (0x048a0000) -> up to 1 bf/chunk
Page 41728 (0x05180000) -> up to 1 bf/chunk
Page 42176 (0x05260000) -> up to 1 bf/chunk
Page 43200 (0x05460000) -> up to 1 bf/chunk
Page 43328 (0x054a0000) -> up to 1 bf/chunk
Page 45376 (0x058a0000) -> up to 1 bf/chunk
Page 47040 (0x05be0000) -> up to 1 bf/chunk
Page 47552 (0x05ce0000) -> up to 1 bf/chunk
Page 49344 (0x06060000) -> up to 1 bf/chunk
Page 49856 (0x06160000) -> up to 1 bf/chunk
Page 62784 (0x07aa0000) -> up to 1 bf/chunk
Page 65153 (0x07f40800) -> up to 1 bf/chunk
Page 65228 (0x07f66000) -> up to 1 bf/chunk
Page 65382 (0x07fb3000) -> up to 1 bf/chunk
Page 98624 (0x0c0a0000) -> up to 1 bf/chunk
Page 101952 (0x0c720000) -> up to 1 bf/chunk
Page 107584 (0x0d220000) -> up to 1 bf/chunk
Page 118208 (0x0e6e0000) -> up to 1 bf/chunk
Page 126656 (0x0f760000) -> up to 1 bf/chunk
Page 127680 (0x0f960000) -> up to 1 bf/chunk
Page 129920 (0x0fdc0000) -> up to 1 bf/chunk
Maximum number of bitflips: 1
Pages with bitflips: 44/130752
It is also possible to reduce the output with the .quiet suffix in order
to just show the summary.
=> nand watch.chip
device 0 whole chip
size adjusted to 0xff60000 (5 bad blocks)
NAND watch for bitflips in area 0x0-0xff60000:
Maximum number of bitflips: 1
Pages with bitflips: 44/130752
Signed-off-by: Miquel Raynal <miquel.raynal at bootlin.com>
---
Hello, I recently came across a batch of NANDs with a lot of "natural"
bitflips so in order to easily and objectively characterize how
unstable these parts were, I wrote this little tool which was pretty
handy to have in U-Boot. I believe it can be useful for others as well,
so here is the patch.
Cheers, Miquèl
cmd/Kconfig | 5 ++
cmd/nand.c | 103 ++++++++++++++++++++++++++++++++++++++++
drivers/mtd/mtdcore.c | 22 +++++++++
include/linux/mtd/mtd.h | 1 +
4 files changed, 131 insertions(+)
diff --git a/cmd/Kconfig b/cmd/Kconfig
index 451baa3ecac..0524328d373 100644
--- a/cmd/Kconfig
+++ b/cmd/Kconfig
@@ -1384,6 +1384,11 @@ config CMD_NAND_TORTURE
help
NAND torture support.
+config CMD_NAND_WATCH
+ bool "nand watch"
+ help
+ NAND watch bitflip support.
+
endif # CMD_NAND
config CMD_NVME
diff --git a/cmd/nand.c b/cmd/nand.c
index 71b8f964429..3bf67f5b65e 100644
--- a/cmd/nand.c
+++ b/cmd/nand.c
@@ -231,6 +231,54 @@ free_dat:
return ret;
}
+#ifdef CONFIG_CMD_NAND_WATCH
+static int nand_watch_bf(struct mtd_info *mtd, ulong off, ulong size, bool quiet)
+{
+ unsigned int max_bf = 0, pages_wbf = 0;
+ unsigned int first_page, pages, i;
+ struct mtd_oob_ops ops = {};
+ u_char *buf;
+ int ret;
+
+ buf = memalign(ARCH_DMA_MINALIGN, mtd->writesize);
+ if (!buf) {
+ puts("No memory for page buffer\n");
+ return 1;
+ }
+
+ first_page = off / mtd->writesize;
+ pages = size / mtd->writesize;
+
+ ops.datbuf = buf;
+ ops.len = mtd->writesize;
+ for (i = first_page; i < first_page + pages; i++) {
+ ulong addr = mtd->writesize * i;
+ ret = mtd_read_oob_bf(mtd, addr, &ops);
+ if (ret < 0) {
+ if (quiet)
+ continue;
+
+ printf("Page %7d (0x%08lx) -> error %d\n",
+ i, addr, ret);
+ } else if (ret) {
+ max_bf = max(max_bf, (unsigned int)ret);
+ pages_wbf++;
+ if (quiet)
+ continue;
+ printf("Page %7d (0x%08lx) -> up to %2d bf/chunk\n",
+ i, addr, ret);
+ }
+ }
+
+ printf("Maximum number of bitflips: %u\n", max_bf);
+ printf("Pages with bitflips: %u/%u\n", pages_wbf, pages);
+
+ free(buf);
+
+ return 0;
+}
+#endif
+
/* ------------------------------------------------------------------------- */
static int set_dev(int dev)
@@ -778,6 +826,55 @@ static int do_nand(struct cmd_tbl *cmdtp, int flag, int argc,
return ret == 0 ? 0 : 1;
}
+#ifdef CONFIG_CMD_NAND_WATCH
+ if (strncmp(cmd, "watch", 5) == 0) {
+ int args = 2;
+
+ if (cmd[5]) {
+ if (!strncmp(&cmd[5], ".part", 5)) {
+ args = 1;
+ } else if (!strncmp(&cmd[5], ".chip", 5)) {
+ args = 0;
+ } else {
+ goto usage;
+ }
+ }
+
+ if (cmd[10])
+ if (!strncmp(&cmd[10], ".quiet", 6))
+ quiet = true;
+
+ if (argc != 2 + args)
+ goto usage;
+
+ ret = mtd_arg_off_size(argc - 2, argv + 2, &dev, &off, &size,
+ &maxsize, MTD_DEV_TYPE_NAND, mtd->size);
+ if (ret)
+ return ret;
+
+ /* size is unspecified */
+ if (argc < 4)
+ adjust_size_for_badblocks(&size, off, dev);
+
+ if ((off & (mtd->writesize - 1)) ||
+ (size & (mtd->writesize - 1))) {
+ printf("Attempt to read non page-aligned data\n");
+ return -EINVAL;
+ }
+
+ ret = set_dev(dev);
+ if (ret)
+ return ret;
+
+ mtd = get_nand_dev_by_index(dev);
+
+ printf("\nNAND watch for bitflips in area 0x%llx-0x%llx:\n",
+ off, off + size);
+
+ return nand_watch_bf(mtd, off, size, quiet);
+ }
+#endif
+
#ifdef CONFIG_CMD_NAND_TORTURE
if (strcmp(cmd, "torture") == 0) {
loff_t endoff;
@@ -943,6 +1040,12 @@ U_BOOT_LONGHELP(nand,
"nand erase.chip [clean] - erase entire chip'\n"
"nand bad - show bad blocks\n"
"nand dump[.oob] off - dump page\n"
+#ifdef CONFIG_CMD_NAND_WATCH
+ "nand watch <off> <size> - check an area for bitflips\n"
+ "nand watch.part <part> - check a partition for bitflips\n"
+ "nand watch.chip - check the whole device for bitflips\n"
+ "\t\t.quiet - Query only the summary, not the details\n"
+#endif
#ifdef CONFIG_CMD_NAND_TORTURE
"nand torture off - torture one block at offset\n"
"nand torture off [size] - torture blocks from off to off+size\n"
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index aa78d41a55e..2baf92a9056 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -1126,6 +1126,28 @@ int mtd_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops)
}
EXPORT_SYMBOL_GPL(mtd_read_oob);
+/* This is a bare copy of mtd_read_oob returning the actual number of bitflips */
+int mtd_read_oob_bf(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops)
+{
+ int ret_code;
+ ops->retlen = ops->oobretlen = 0;
+ if (!mtd->_read_oob)
+ return -EOPNOTSUPP;
+ /*
+ * In cases where ops->datbuf != NULL, mtd->_read_oob() has semantics
+ * similar to mtd->_read(), returning a non-negative integer
+ * representing max bitflips. In other cases, mtd->_read_oob() may
+ * return -EUCLEAN. In all cases, perform similar logic to mtd_read().
+ */
+ ret_code = mtd->_read_oob(mtd, from, ops);
+ if (unlikely(ret_code < 0))
+ return ret_code;
+ if (mtd->ecc_strength == 0)
+ return 0; /* device lacks ecc */
+ return ret_code;
+}
+EXPORT_SYMBOL_GPL(mtd_read_oob_bf);
+
int mtd_write_oob(struct mtd_info *mtd, loff_t to,
struct mtd_oob_ops *ops)
{
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 09f52698877..28afbb86ea9 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -413,6 +413,7 @@ int mtd_panic_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen,
const u_char *buf);
int mtd_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops);
+int mtd_read_oob_bf(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops);
int mtd_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops);
int mtd_get_fact_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen,
--
2.34.1
More information about the U-Boot
mailing list