[U-Boot] [PATCH] UBI: fix endless loop when a bitflip was detected

Tue Sep 13 11:07:13 CEST 2011

On km_kirkwood boards we saw that u-boot gets stuck if the
UBI layer detects a correctable bitflip in the NAND flash
when u-boot tries to attach to the UBI device.

This was already fixed in the UBI layer in the current
mainline linux. The original commit was:

"commit b86a2c56e512f46d140a4bcb4e35e8a7d4a99a4b
Author: Artem Bityutskiy <Artem.Bityutskiy at nokia.com>
Date:   Sun May 24 14:13:34 2009 +0300

UBI: do not switch to R/O mode on read errors

This patch improves UBI errors handling. ATM UBI switches to
R/O mode when the WL worker fails to read the source PEB.
This means that the upper layers (e.g., UBIFS) has no
chances to unmap the erroneous PEB and fix the error.
This patch changes this behaviour and makes UBI put PEBs
like this into a separate RB-tree, thus preventing the
WL worker from hitting the same read errors again and
again.

But there is a 10% limit on a maximum amount of PEBs like this.
If there are too much of them, UBI switches to R/O mode.

Additionally, this patch teaches UBI not to panic and
switch to R/O mode if after a PEB has been copied, the
target LEB cannot be read back. Instead, now UBI cancels
the operation and schedules the target PEB for torturing.

The error paths has been tested by ingecting errors
into 'ubi_eba_copy_leb()'."

This patch is the portation to u-boot with some changes. The
constants for errors used in the original patch were replaced
with numeric values as present in current ubi layer. The constant
"MOVE_TARGET_RD_ERR" was removed from the original patch because
the current u-boot version didn't evaluate this value.

Signed-off-by: Holger Brunck <holger.brunck at keymile.com>
cc: Stefan Roese <sr at denx.de>
---
 drivers/mtd/ubi/build.c |    9 +++++++++
 drivers/mtd/ubi/eba.c   |    8 ++++++--
 drivers/mtd/ubi/ubi.h   |   10 ++++++++--
 drivers/mtd/ubi/wl.c    |   43 +++++++++++++++++++++++++++++++++++++------
 4 files changed, 60 insertions(+), 10 deletions(-)

diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 3ea0e6c..95ce665 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -607,6 +607,15 @@ static int io_init(struct ubi_device *ubi)
 	}
 
 	/*
+	 * Set maximum amount of physical erroneous eraseblocks to be 10%.
+	 * Erroneous PEB are those which have read errors.
+	 */
+	ubi->max_erroneous = ubi->peb_count / 10;
+	if (ubi->max_erroneous < 16)
+		ubi->max_erroneous = 16;
+	dbg_msg("max_erroneous    %d", ubi->max_erroneous);
+
+	/*
 	 * It may happen that EC and VID headers are situated in one minimal
 	 * I/O unit. In this case we can only accept this UBI image in
 	 * read-only mode.
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index d523c94..82131b7 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -438,8 +438,9 @@ retry:
 				 * not implemented.
 				 */
 				if (err == UBI_IO_BAD_VID_HDR) {
-					ubi_warn("bad VID header at PEB %d, LEB"
-						 "%d:%d", pnum, vol_id, lnum);
+					ubi_warn("corrupted VID header at PEB "
+						 "%d, LEB %d:%d", pnum, vol_id,
+						 lnum);
 					err = -EBADMSG;
 				} else
 					ubi_ro_mode(ubi);
@@ -1053,6 +1054,9 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 	if (err && err != UBI_IO_BITFLIPS) {
 		ubi_warn("error %d while reading data from PEB %d",
 			 err, from);
+		if (err == -EIO)
+			err = 2;
+
 		goto out_unlock_buf;
 	}
 
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index bf77a15..dbb00c3 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -293,14 +293,15 @@ struct ubi_wl_entry;
  * @alc_mutex: serializes "atomic LEB change" operations
  *
  * @used: RB-tree of used physical eraseblocks
+ * @erroneous: RB-tree of erroneous used physical eraseblocks
  * @free: RB-tree of free physical eraseblocks
  * @scrub: RB-tree of physical eraseblocks which need scrubbing
  * @prot: protection trees
  * @prot.pnum: protection tree indexed by physical eraseblock numbers
  * @prot.aec: protection tree indexed by absolute erase counter value
  * @wl_lock: protects the @used, @free, @prot, @lookuptbl, @abs_ec, @move_from,
- *           @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works
- *           fields
+ * 	     @move_to, @move_to_put @erase_pending, @wl_scheduled, @works and
+ * 	     @erroneous_peb_count fields
  * @move_mutex: serializes eraseblock moves
  * @wl_scheduled: non-zero if the wear-leveling was scheduled
  * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any
@@ -320,6 +321,8 @@ struct ubi_wl_entry;
  * @peb_size: physical eraseblock size
  * @bad_peb_count: count of bad physical eraseblocks
  * @good_peb_count: count of good physical eraseblocks
+ * @erroneous_peb_count: count of erroneous physical eraseblocks in @erroneous
+ * @max_erroneous: maximum allowed amount of erroneous physical eraseblocks
  * @min_io_size: minimal input/output unit size of the underlying MTD device
  * @hdrs_min_io_size: minimal I/O unit size used for VID and EC headers
  * @ro_mode: if the UBI device is in read-only mode
@@ -376,6 +379,7 @@ struct ubi_device {
 
 	/* Wear-leveling unit's stuff */
 	struct rb_root used;
+	struct rb_root erroneous;
 	struct rb_root free;
 	struct rb_root scrub;
 	struct {
@@ -403,6 +407,8 @@ struct ubi_device {
 	int peb_size;
 	int bad_peb_count;
 	int good_peb_count;
+	int erroneous_peb_count;
+	int max_erroneous;
 	int min_io_size;
 	int hdrs_min_io_size;
 	int ro_mode;
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 2f9a5e3..e94b058 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -53,9 +53,9 @@
  * moving it for wear-leveling reasons.
  *
  * As it was said, for the UBI unit all physical eraseblocks are either "free"
- * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used
- * eraseblocks are kept in a set of different RB-trees: @wl->used,
- * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub.
+ * or "used". Free eraseblock are kept in the @wl->free RB-tree, while
+ * used eraseblocks are kept in @wl->used, @wl->erroneous, or @wl->scrub
+ * RB-trees, as well as (temporarily) in the @wl->pq queue.
  *
  * Note, in this implementation, we keep a small in-RAM object for each physical
  * eraseblock. This is surely not a scalable solution. But it appears to be good
@@ -160,6 +160,8 @@
  * corresponds to the @wl->free tree. The latter state is split up on several
  * sub-states:
  * o the WL movement is allowed (@wl->used tree);
+ * o the WL movement is disallowed (@wl->erroneous) becouse the PEB is
+ *   erroneous - e.g., there was a read error;
  * o the WL movement is temporarily prohibited (@wl->prot.pnum and
  * @wl->prot.aec trees);
  * o scrubbing is needed (@wl->scrub tree).
@@ -746,7 +748,7 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
 static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 				int cancel)
 {
-	int err, put = 0, scrubbing = 0, protect = 0;
+	int err, put = 0, scrubbing = 0, protect = 0, erroneous = 0;
 	struct ubi_wl_prot_entry *uninitialized_var(pe);
 	struct ubi_wl_entry *e1, *e2;
 	struct ubi_vid_hdr *vid_hdr;
@@ -857,6 +859,24 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 		if (err == 1)
 			goto out_not_moved;
 
+		if (err == 2) {
+			/*
+			 * An error happened while reading the source PEB. Do
+			 * not switch to R/O mode in this case, and give the
+			 * upper layers a possibility to recover from this,
+			 * e.g. by unmapping corresponding LEB. Instead, just
+			 * put thie PEB to the @ubi->erroneus list to prevent
+			 * UBI from trying to move the over and over again.
+			 */
+			if (ubi->erroneous_peb_count > ubi->max_erroneous) {
+				ubi_err("too many erroneous eraseblocks (%d)",
+					ubi->erroneous_peb_count);
+				goto out_error;
+			}
+			erroneous = 1;
+			goto out_not_moved;
+		}
+
 		/*
 		 * For some reason the LEB was not moved - it might be because
 		 * the volume is being deleted. We should prevent this PEB from
@@ -916,7 +936,10 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 out_not_moved:
 	ubi_free_vid_hdr(ubi, vid_hdr);
 	spin_lock(&ubi->wl_lock);
-	if (scrubbing)
+	if (erroneous) {
+		wl_tree_add(e1, &ubi->erroneous);
+		ubi->erroneous_peb_count += 1;
+	} else if (scrubbing)
 		wl_tree_add(e1, &ubi->scrub);
 	else
 		wl_tree_add(e1, &ubi->used);
@@ -1197,6 +1220,13 @@ retry:
 		} else if (in_wl_tree(e, &ubi->scrub)) {
 			paranoid_check_in_wl_tree(e, &ubi->scrub);
 			rb_erase(&e->rb, &ubi->scrub);
+		} else if (in_wl_tree(e, &ubi->erroneous)) {
+			paranoid_check_in_wl_tree(e, &ubi->erroneous);
+			rb_erase(&e->rb, &ubi->erroneous);
+			ubi->erroneous_peb_count -= 1;
+			ubi_assert(ubi->erroneous_peb_count >= 0);
+			/* Erronious PEBs should be tortured */
+			torture = 1;
 		} else {
 			err = prot_tree_del(ubi, e->pnum);
 			if (err) {
@@ -1446,7 +1476,7 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 	struct ubi_wl_entry *e;
 
 
-	ubi->used = ubi->free = ubi->scrub = RB_ROOT;
+	ubi->used = ubi->erroneous = ubi->free = ubi->scrub = RB_ROOT;
 	ubi->prot.pnum = ubi->prot.aec = RB_ROOT;
 	spin_lock_init(&ubi->wl_lock);
 	mutex_init(&ubi->move_mutex);
@@ -1597,6 +1627,7 @@ void ubi_wl_close(struct ubi_device *ubi)
 	cancel_pending(ubi);
 	protection_trees_destroy(ubi);
 	tree_destroy(&ubi->used);
+	tree_destroy(&ubi->erroneous);
 	tree_destroy(&ubi->free);
 	tree_destroy(&ubi->scrub);
 	kfree(ubi->lookuptbl);
-- 
1.7.1