[RFC PATCH 2/2] check_linker_lists: Enhance detection of alignment problems

Tue May 26 00:26:32 CEST 2026

From: Simon Glass <simon.glass at canonical.com>

When linker-inserted padding breaks list integrity, pointer arithmetic
like (end - start) / sizeof(struct) produces garbage. GCC optimizes
division by constants using multiplicative inverses, which only works
when the dividend is an exact multiple. With padding, outputs like
"Running -858993444 bloblist tests" appear instead of the correct count.

Enhance the linker list checking script to detect these problems by
adding symbol size tracking using nm -S. This enables:

1. Padding detection: Compare symbol sizes to gaps. If gap > size,
   padding was inserted, breaking contiguous array assumptions.

2. Pointer arithmetic bugs: Check if (end - start) marker span is a
   multiple of struct size.

Signed-off-by: Simon Glass <simon.glass at canonical.com>
---

 scripts/check_linker_lists.py | 127 +++++++++++++++++++++++++++-------
 1 file changed, 102 insertions(+), 25 deletions(-)

diff --git a/scripts/check_linker_lists.py b/scripts/check_linker_lists.py
index 46ff4465989..30183a14dc7 100755
--- a/scripts/check_linker_lists.py
+++ b/scripts/check_linker_lists.py
@@ -15,7 +15,8 @@ Exit Codes:
   0: Success - no alignment problems were found
   1: Usage Error - the script was not called with the correct arguments
   2: Execution Error - failed to run 'nm' or the ELF file was not found
-  3: Problem Found - an inconsistent gap was detected in at least one list
+  3: Problem Found - inconsistent gap, padding, or pointer-arithmetic
+                     mismatch detected in at least one list
 """
 
 import sys
@@ -25,8 +26,15 @@ import argparse
 from statistics import mode
 from collections import defaultdict, namedtuple
 
+# Information about a symbol: address, size (from nm -S), and name
+Symbol = namedtuple('Symbol', ['address', 'size', 'name'])
+
 # Information about the gap between two consecutive symbols
-Gap = namedtuple('Gap', ['gap', 'prev_sym', 'next_sym'])
+Gap = namedtuple('Gap', ['gap', 'prev_sym', 'next_sym', 'prev_size'])
+
+# Start and end marker addresses for a list
+Markers = namedtuple('Markers', ['start', 'end'])
+
 # Holds all the analysis results from checking the lists
 Results = namedtuple('Results', [
     'total_problems', 'total_symbols', 'all_lines', 'max_name_len',
@@ -41,18 +49,26 @@ SIZE_COL = 17
 # Matches a list entry symbol (the '_2_' infix marks the actual list elements,
 # as opposed to the '_1' start and '_3' end markers).
 ENTRY_PATTERN = re.compile(r'^_u_boot_list_\d+_(?P<base_name>\w+)_2_')
+# Matches the start ('_1') and end ('_3') markers of a list. The regex on its
+# own is ambiguous: an entry whose user-chosen name ends in '_1' or '_3' (e.g.
+# '_u_boot_list_2_driver_2_foo_1') is also accepted, with '_2_foo' folded into
+# base_name. Callers must reject any match whose base_name contains '_2_'
+# (see run_nm_and_get_lists()) — real list names never do.
+MARKER_PATTERN = re.compile(
+    r'^_u_boot_list_\d+_(?P<base_name>\w+)_(?P<marker>[13])$')
 
 def eprint(*args, **kwargs):
     """Print to stderr"""
     print(*args, file=sys.stderr, **kwargs)
 
-def check_single_list(name, symbols, max_name_len):
+def check_single_list(name, symbols, max_name_len, marker_info=None):
     """Check alignment for a single list and return its findings
 
     Args:
         name (str): The cleaned-up name of the list for display
-        symbols (list): A list of (address, name) tuples, sorted by address
+        symbols (list): A list of Symbol tuples, sorted by address
         max_name_len (int): The max length of list names for column formatting
+        marker_info (Markers): Optional namedtuple with start and end addresses
 
     Returns:
         tuple: (problem_count, list_of_output_lines)
@@ -62,9 +78,9 @@ def check_single_list(name, symbols, max_name_len):
 
     gaps = []
     for i in range(len(symbols) - 1):
-        addr1, name1 = symbols[i]
-        addr2, name2 = symbols[i + 1]
-        gaps.append(Gap(gap=addr2 - addr1, prev_sym=name1, next_sym=name2))
+        sym1, sym2 = symbols[i], symbols[i + 1]
+        gaps.append(Gap(gap=sym2.address - sym1.address, prev_sym=sym1.name,
+                        next_sym=sym2.name, prev_size=sym1.size))
 
     expected_gap = mode(g.gap for g in gaps)
     lines = [f'{name:<{max_name_len + NAME_PAD}}  {len(symbols):>{SYM_COL}}  '
@@ -76,19 +92,42 @@ def check_single_list(name, symbols, max_name_len):
             problem_count += 1
             lines.append(
                 f'  - Bad gap (0x{g.gap:x}) before symbol: {g.next_sym}')
+        elif g.prev_size and g.gap > g.prev_size:
+            # Gap is larger than symbol size - padding was inserted
+            problem_count += 1
+            lines.append(
+                f'  - Padding: gap 0x{g.gap:x} > size 0x{g.prev_size:x}'
+                f' before: {g.next_sym}')
+
+    # Check if start/end marker span is a multiple of the struct size. If not,
+    # pointer subtraction (end - start) will produce wrong results due to
+    # compiler optimisation using magic-number multiplication. Skip when
+    # expected_gap is zero to avoid ZeroDivisionError on degenerate input
+    # (two list entries sharing an address).
+    if marker_info and expected_gap:
+        total_span = marker_info.end - marker_info.start
+        remainder = total_span % expected_gap
+        if remainder:
+            problem_count += 1
+            lines.append(
+                f'  - Pointer arithmetic bug: span 0x{total_span:x} is not a '
+                f'multiple of struct size 0x{expected_gap:x} '
+                f'(remainder: {remainder})')
 
     return problem_count, lines
 
 def run_nm_and_get_lists(elf_path):
-    """Run 'nm' and parse the output to discover all linker lists
+    """Run 'nm -S' and parse the output to discover all linker lists
 
     Args:
         elf_path (str): The path to the ELF file to process
 
     Returns:
-        dict or None: A dictionary of discovered lists, or None on error
+        tuple or None: (lists_dict, markers_dict) or None on error
+            lists_dict: entries keyed by base_name
+            markers_dict: start/end marker addresses keyed by base_name
     """
-    cmd = ['nm', '-n', elf_path]
+    cmd = ['nm', '-S', '-n', elf_path]
     try:
         proc = subprocess.run(cmd, capture_output=True, text=True, check=True)
     except FileNotFoundError:
@@ -103,29 +142,57 @@ def run_nm_and_get_lists(elf_path):
         return None
 
     lists = defaultdict(list)
+    markers = defaultdict(dict)  # {base_name: {'start': addr, 'end': addr}}
+
     for line in proc.stdout.splitlines():
         if '_u_boot_list_' not in line:
             continue
         try:
+            # 'nm -S' formats: '<addr> <size> <type> <name>' for sized symbols
+            # and '<addr> <type> <name>' for unsized (e.g. linker-defined
+            # start/end markers). Size defaults to 0 when absent.
             parts = line.strip().split()
-            address, name = int(parts[0], 16), parts[-1]
+            address = int(parts[0], 16)
+            name = parts[-1]
+            size = int(parts[1], 16) if len(parts) == 4 else 0
+
             match = ENTRY_PATTERN.match(name)
             if match:
-                lists[match.group('base_name')].append((address, name))
+                lists[match.group('base_name')].append(
+                    Symbol(address, size, name))
+                continue
+
+            # Reject marker matches where '_2_' leaked into base_name; those
+            # are entries the regex accepted via backtracking (see comment on
+            # MARKER_PATTERN). Real list names never contain '_2_'.
+            match = MARKER_PATTERN.match(name)
+            if match and '_2_' not in match.group('base_name'):
+                base_name = match.group('base_name')
+                key = 'start' if match.group('marker') == '1' else 'end'
+                markers[base_name][key] = address
         except (ValueError, IndexError):
             eprint(f'Warning: Could not parse line: {line}')
 
-    return lists
+    # Convert marker dicts to Markers namedtuples (only if both start/end exist)
+    marker_tuples = {n: Markers(m['start'], m['end'])
+                     for n, m in markers.items()
+                     if 'start' in m and 'end' in m}
 
-def collect_data(lists):
+    return lists, marker_tuples
+
+def collect_data(lists, markers):
     """Collect alignment check data for all lists
 
     Args:
         lists (dict): A dictionary of lists and their symbols
+        markers (dict): A dictionary of start/end marker addresses per list
 
     Returns:
         Results: A namedtuple containing the analysis results
     """
+    if markers is None:
+        markers = {}
+
     max_name_len = max((len(n) for n in lists), default=0)
 
     total_problems = 0
@@ -134,8 +201,8 @@ def collect_data(lists):
     for list_name in sorted(lists):
         symbols = lists[list_name]
         total_symbols += len(symbols)
-        problem_count, lines = check_single_list(list_name, symbols,
-                                                 max_name_len)
+        problem_count, lines = check_single_list(
+            list_name, symbols, max_name_len, markers.get(list_name))
         total_problems += problem_count
         all_lines.extend(lines)
 
@@ -177,13 +244,22 @@ def main():
     """Main entry point of the script, returns an exit code"""
     epilog_text = '''
 Auto-discover all linker-generated lists in a U-Boot ELF file
-(e.g., for drivers, commands, etc.) and verify their integrity. Check
-that all elements in a given list are separated by a consistent number of
-bytes.
+(e.g., for drivers, commands, etc.) and verify their integrity.
+
+Problems detected (cause build failure):
+
+1. Inconsistent gaps: Elements in a list should all be separated by the same
+   number of bytes (the struct size). If the linker inserts padding between
+   some elements but not others, this is detected and reported.
+
+2. Padding detection: Using symbol sizes from nm -S, the script compares each
+   symbol's size to the gap after it. If gap > size, the linker inserted
+   padding, which breaks U-Boot's assumption that the list is a contiguous
+   array of same-sized structs.
 
-Problems typically indicate that the linker has inserted alignment padding
-between two elements in a list, which can break U-Boot's assumption that the
-list is a simple, contiguous array of same-sized structs.
+3. Pointer arithmetic bugs: Each list has start (_1) and end (_3) markers.
+   If the span (end - start) is not a multiple of struct size, pointer
+   subtraction produces garbage due to GCC's magic-number division.
 '''
     parser = argparse.ArgumentParser(
         description='Check alignment of U-Boot linker lists in an ELF file.',
@@ -197,16 +273,17 @@ list is a simple, contiguous array of same-sized structs.
 
     args = parser.parse_args()
 
-    lists = run_nm_and_get_lists(args.elf_path)
-    if lists is None:
+    result = run_nm_and_get_lists(args.elf_path)
+    if result is None:
         return 2  # Error running nm
 
+    lists, markers = result
     if not lists:
         if args.verbose:
             eprint('Success: No U-Boot linker lists found to check')
         return 0
 
-    results = collect_data(lists)
+    results = collect_data(lists, markers)
     show_output(results, args.verbose)
 
     return 3 if results.total_problems > 0 else 0
-- 
2.43.0