[RFC PATCH 2/2] check_linker_lists: Enhance detection of alignment problems
Simon Glass
sjg at chromium.org
Tue May 26 00:26:32 CEST 2026
From: Simon Glass <simon.glass at canonical.com>
When linker-inserted padding breaks list integrity, pointer arithmetic
like (end - start) / sizeof(struct) produces garbage. GCC optimizes
division by constants using multiplicative inverses, which only works
when the dividend is an exact multiple. With padding, outputs like
"Running -858993444 bloblist tests" appear instead of the correct count.
Enhance the linker list checking script to detect these problems by
adding symbol size tracking using nm -S. This enables:
1. Padding detection: Compare symbol sizes to gaps. If gap > size,
padding was inserted, breaking contiguous array assumptions.
2. Pointer arithmetic bugs: Check if (end - start) marker span is a
multiple of struct size.
Signed-off-by: Simon Glass <simon.glass at canonical.com>
---
scripts/check_linker_lists.py | 127 +++++++++++++++++++++++++++-------
1 file changed, 102 insertions(+), 25 deletions(-)
diff --git a/scripts/check_linker_lists.py b/scripts/check_linker_lists.py
index 46ff4465989..30183a14dc7 100755
--- a/scripts/check_linker_lists.py
+++ b/scripts/check_linker_lists.py
@@ -15,7 +15,8 @@ Exit Codes:
0: Success - no alignment problems were found
1: Usage Error - the script was not called with the correct arguments
2: Execution Error - failed to run 'nm' or the ELF file was not found
- 3: Problem Found - an inconsistent gap was detected in at least one list
+ 3: Problem Found - inconsistent gap, padding, or pointer-arithmetic
+ mismatch detected in at least one list
"""
import sys
@@ -25,8 +26,15 @@ import argparse
from statistics import mode
from collections import defaultdict, namedtuple
+# Information about a symbol: address, size (from nm -S), and name
+Symbol = namedtuple('Symbol', ['address', 'size', 'name'])
+
# Information about the gap between two consecutive symbols
-Gap = namedtuple('Gap', ['gap', 'prev_sym', 'next_sym'])
+Gap = namedtuple('Gap', ['gap', 'prev_sym', 'next_sym', 'prev_size'])
+
+# Start and end marker addresses for a list
+Markers = namedtuple('Markers', ['start', 'end'])
+
# Holds all the analysis results from checking the lists
Results = namedtuple('Results', [
'total_problems', 'total_symbols', 'all_lines', 'max_name_len',
@@ -41,18 +49,26 @@ SIZE_COL = 17
# Matches a list entry symbol (the '_2_' infix marks the actual list elements,
# as opposed to the '_1' start and '_3' end markers).
ENTRY_PATTERN = re.compile(r'^_u_boot_list_\d+_(?P<base_name>\w+)_2_')
+# Matches the start ('_1') and end ('_3') markers of a list. The regex on its
+# own is ambiguous: an entry whose user-chosen name ends in '_1' or '_3' (e.g.
+# '_u_boot_list_2_driver_2_foo_1') is also accepted, with '_2_foo' folded into
+# base_name. Callers must reject any match whose base_name contains '_2_'
+# (see run_nm_and_get_lists()) — real list names never do.
+MARKER_PATTERN = re.compile(
+ r'^_u_boot_list_\d+_(?P<base_name>\w+)_(?P<marker>[13])$')
def eprint(*args, **kwargs):
"""Print to stderr"""
print(*args, file=sys.stderr, **kwargs)
-def check_single_list(name, symbols, max_name_len):
+def check_single_list(name, symbols, max_name_len, marker_info=None):
"""Check alignment for a single list and return its findings
Args:
name (str): The cleaned-up name of the list for display
- symbols (list): A list of (address, name) tuples, sorted by address
+ symbols (list): A list of Symbol tuples, sorted by address
max_name_len (int): The max length of list names for column formatting
+ marker_info (Markers): Optional namedtuple with start and end addresses
Returns:
tuple: (problem_count, list_of_output_lines)
@@ -62,9 +78,9 @@ def check_single_list(name, symbols, max_name_len):
gaps = []
for i in range(len(symbols) - 1):
- addr1, name1 = symbols[i]
- addr2, name2 = symbols[i + 1]
- gaps.append(Gap(gap=addr2 - addr1, prev_sym=name1, next_sym=name2))
+ sym1, sym2 = symbols[i], symbols[i + 1]
+ gaps.append(Gap(gap=sym2.address - sym1.address, prev_sym=sym1.name,
+ next_sym=sym2.name, prev_size=sym1.size))
expected_gap = mode(g.gap for g in gaps)
lines = [f'{name:<{max_name_len + NAME_PAD}} {len(symbols):>{SYM_COL}} '
@@ -76,19 +92,42 @@ def check_single_list(name, symbols, max_name_len):
problem_count += 1
lines.append(
f' - Bad gap (0x{g.gap:x}) before symbol: {g.next_sym}')
+ elif g.prev_size and g.gap > g.prev_size:
+ # Gap is larger than symbol size - padding was inserted
+ problem_count += 1
+ lines.append(
+ f' - Padding: gap 0x{g.gap:x} > size 0x{g.prev_size:x}'
+ f' before: {g.next_sym}')
+
+ # Check if start/end marker span is a multiple of the struct size. If not,
+ # pointer subtraction (end - start) will produce wrong results due to
+ # compiler optimisation using magic-number multiplication. Skip when
+ # expected_gap is zero to avoid ZeroDivisionError on degenerate input
+ # (two list entries sharing an address).
+ if marker_info and expected_gap:
+ total_span = marker_info.end - marker_info.start
+ remainder = total_span % expected_gap
+ if remainder:
+ problem_count += 1
+ lines.append(
+ f' - Pointer arithmetic bug: span 0x{total_span:x} is not a '
+ f'multiple of struct size 0x{expected_gap:x} '
+ f'(remainder: {remainder})')
return problem_count, lines
def run_nm_and_get_lists(elf_path):
- """Run 'nm' and parse the output to discover all linker lists
+ """Run 'nm -S' and parse the output to discover all linker lists
Args:
elf_path (str): The path to the ELF file to process
Returns:
- dict or None: A dictionary of discovered lists, or None on error
+ tuple or None: (lists_dict, markers_dict) or None on error
+ lists_dict: entries keyed by base_name
+ markers_dict: start/end marker addresses keyed by base_name
"""
- cmd = ['nm', '-n', elf_path]
+ cmd = ['nm', '-S', '-n', elf_path]
try:
proc = subprocess.run(cmd, capture_output=True, text=True, check=True)
except FileNotFoundError:
@@ -103,29 +142,57 @@ def run_nm_and_get_lists(elf_path):
return None
lists = defaultdict(list)
+ markers = defaultdict(dict) # {base_name: {'start': addr, 'end': addr}}
+
for line in proc.stdout.splitlines():
if '_u_boot_list_' not in line:
continue
try:
+ # 'nm -S' formats: '<addr> <size> <type> <name>' for sized symbols
+ # and '<addr> <type> <name>' for unsized (e.g. linker-defined
+ # start/end markers). Size defaults to 0 when absent.
parts = line.strip().split()
- address, name = int(parts[0], 16), parts[-1]
+ address = int(parts[0], 16)
+ name = parts[-1]
+ size = int(parts[1], 16) if len(parts) == 4 else 0
+
match = ENTRY_PATTERN.match(name)
if match:
- lists[match.group('base_name')].append((address, name))
+ lists[match.group('base_name')].append(
+ Symbol(address, size, name))
+ continue
+
+ # Reject marker matches where '_2_' leaked into base_name; those
+ # are entries the regex accepted via backtracking (see comment on
+ # MARKER_PATTERN). Real list names never contain '_2_'.
+ match = MARKER_PATTERN.match(name)
+ if match and '_2_' not in match.group('base_name'):
+ base_name = match.group('base_name')
+ key = 'start' if match.group('marker') == '1' else 'end'
+ markers[base_name][key] = address
except (ValueError, IndexError):
eprint(f'Warning: Could not parse line: {line}')
- return lists
+ # Convert marker dicts to Markers namedtuples (only if both start/end exist)
+ marker_tuples = {n: Markers(m['start'], m['end'])
+ for n, m in markers.items()
+ if 'start' in m and 'end' in m}
-def collect_data(lists):
+ return lists, marker_tuples
+
+def collect_data(lists, markers):
"""Collect alignment check data for all lists
Args:
lists (dict): A dictionary of lists and their symbols
+ markers (dict): A dictionary of start/end marker addresses per list
Returns:
Results: A namedtuple containing the analysis results
"""
+ if markers is None:
+ markers = {}
+
max_name_len = max((len(n) for n in lists), default=0)
total_problems = 0
@@ -134,8 +201,8 @@ def collect_data(lists):
for list_name in sorted(lists):
symbols = lists[list_name]
total_symbols += len(symbols)
- problem_count, lines = check_single_list(list_name, symbols,
- max_name_len)
+ problem_count, lines = check_single_list(
+ list_name, symbols, max_name_len, markers.get(list_name))
total_problems += problem_count
all_lines.extend(lines)
@@ -177,13 +244,22 @@ def main():
"""Main entry point of the script, returns an exit code"""
epilog_text = '''
Auto-discover all linker-generated lists in a U-Boot ELF file
-(e.g., for drivers, commands, etc.) and verify their integrity. Check
-that all elements in a given list are separated by a consistent number of
-bytes.
+(e.g., for drivers, commands, etc.) and verify their integrity.
+
+Problems detected (cause build failure):
+
+1. Inconsistent gaps: Elements in a list should all be separated by the same
+ number of bytes (the struct size). If the linker inserts padding between
+ some elements but not others, this is detected and reported.
+
+2. Padding detection: Using symbol sizes from nm -S, the script compares each
+ symbol's size to the gap after it. If gap > size, the linker inserted
+ padding, which breaks U-Boot's assumption that the list is a contiguous
+ array of same-sized structs.
-Problems typically indicate that the linker has inserted alignment padding
-between two elements in a list, which can break U-Boot's assumption that the
-list is a simple, contiguous array of same-sized structs.
+3. Pointer arithmetic bugs: Each list has start (_1) and end (_3) markers.
+ If the span (end - start) is not a multiple of struct size, pointer
+ subtraction produces garbage due to GCC's magic-number division.
'''
parser = argparse.ArgumentParser(
description='Check alignment of U-Boot linker lists in an ELF file.',
@@ -197,16 +273,17 @@ list is a simple, contiguous array of same-sized structs.
args = parser.parse_args()
- lists = run_nm_and_get_lists(args.elf_path)
- if lists is None:
+ result = run_nm_and_get_lists(args.elf_path)
+ if result is None:
return 2 # Error running nm
+ lists, markers = result
if not lists:
if args.verbose:
eprint('Success: No U-Boot linker lists found to check')
return 0
- results = collect_data(lists)
+ results = collect_data(lists, markers)
show_output(results, args.verbose)
return 3 if results.total_problems > 0 else 0
--
2.43.0
More information about the U-Boot
mailing list