Add size tracing tool

This commit is contained in:
jaseg 2020-03-12 21:59:45 +01:00
parent 03e8443e2e
commit bf7e8701c7
5 changed files with 596 additions and 0 deletions

View file

@ -0,0 +1,126 @@
import sys
import pyparsing as pp
from pyparsing import pyparsing_common as ppc
LPAREN, RPAREN, LBRACE, RBRACE, LBROK, RBROK, COLON, SEMICOLON, EQUALS, COMMA = map(pp.Suppress, '(){}<>:;=,')
parse_suffix_int = lambda lit: int(lit[:-1]) * (10**(3*(1 + 'kmgtpe'.find(lit[-1].lower()))))
si_suffix = pp.oneOf('k m g t p e', caseless=True)
numeric_literal = pp.Regex('0x[0-9a-fA-F]+').setName('hex int').setParseAction(pp.tokenMap(int, 16)) \
| (pp.Regex('[0-9]+[kKmMgGtTpPeE]')).setName('size int').setParseAction(pp.tokenMap(parse_suffix_int)) \
| pp.Word(pp.nums).setName('int').setParseAction(pp.tokenMap(int))
access_def = pp.Regex('[rR]?[wW]?[xX]?').setName('access literal').setParseAction(pp.tokenMap(str.lower))
origin_expr = pp.Suppress(pp.CaselessKeyword('ORIGIN')) + EQUALS + numeric_literal
length_expr = pp.Suppress(pp.CaselessKeyword('LENGTH')) + EQUALS + numeric_literal
mem_expr = pp.Group(ppc.identifier + LPAREN + access_def + RPAREN + COLON + origin_expr + COMMA + length_expr)
mem_contents = pp.ZeroOrMore(mem_expr)
mem_toplevel = pp.CaselessKeyword("MEMORY") + pp.Group(LBRACE + pp.Optional(mem_contents, []) + RBRACE)
glob = pp.Word(pp.alphanums + '._*')
match_expr = pp.Forward()
assignment = pp.Forward()
funccall = pp.Group(pp.Word(pp.alphas + '_') + LPAREN + (assignment | numeric_literal | match_expr | glob | ppc.identifier) + RPAREN + pp.Optional(SEMICOLON))
value = numeric_literal | funccall | ppc.identifier | '.'
formula = (value + pp.oneOf('+ = * / %') + value) | value
# suppress stray semicolons
assignment << (SEMICOLON | pp.Group((ppc.identifier | '.') + EQUALS + (formula | value) + pp.Optional(SEMICOLON)))
match_expr << (glob + LPAREN + pp.OneOrMore(funccall | glob) + RPAREN)
section_contents = pp.ZeroOrMore(assignment | funccall | match_expr);
section_name = pp.Regex('\.[a-zA-Z0-9_.]+')
section_def = pp.Group(section_name + pp.Optional(numeric_literal) + COLON + LBRACE + pp.Group(section_contents) +
RBRACE + pp.Optional(RBROK + ppc.identifier + pp.Optional('AT' + RBROK + ppc.identifier)))
sec_contents = pp.ZeroOrMore(section_def | assignment)
sections_toplevel = pp.Group(pp.CaselessKeyword("SECTIONS").suppress() + LBRACE + sec_contents + RBRACE)
toplevel_elements = mem_toplevel | funccall | sections_toplevel | assignment
ldscript = pp.Group(pp.ZeroOrMore(toplevel_elements))
ldscript.ignore(pp.cppStyleComment)
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('linker_script', type=argparse.FileType('r'))
args = parser.parse_args()
#print(mem_expr.parseString('FLASH (rx) : ORIGIN = 0x0800000, LENGTH = 512K', parseAll=True))
# print(ldscript.parseString('''
# /* Entry Point */
# ENTRY(Reset_Handler)
#
# /* Highest address of the user mode stack */
# _estack = 0x20020000; /* end of RAM */
# /* Generate a link error if heap and stack don't fit into RAM */
# _Min_Heap_Size = 0x200;; /* required amount of heap */
# _Min_Stack_Size = 0x400;; /* required amount of stack */
# ''', parseAll=True))
print(ldscript.parseFile(args.linker_script, parseAll=True))
#print(funccall.parseString('KEEP(*(.isr_vector))'))
#print(section_contents.parseString('''
# . = ALIGN(4);
# KEEP(*(.isr_vector)) /* Startup code */
# . = ALIGN(4);
# ''', parseAll=True))
#print(section_def.parseString('''
# .text :
# {
# . = ALIGN(4);
# *(.text) /* .text sections (code) */
# *(.text*) /* .text* sections (code) */
# *(.glue_7) /* glue arm to thumb code */
# *(.glue_7t) /* glue thumb to arm code */
# *(.eh_frame)
#
# KEEP (*(.init))
# KEEP (*(.fini))
#
# . = ALIGN(4);
# _etext = .; /* define a global symbols at end of code */
# } >FLASH
# ''', parseAll=True))
#print(section_def.parseString('.ARM.extab : { *(.ARM.extab* .gnu.linkonce.armextab.*) } >FLASH', parseAll=True))
#print(assignment.parseString('__preinit_array_start = .', parseAll=True))
#print(assignment.parseString('a = 23', parseAll=True))
#print(funccall.parseString('foo (a=23)', parseAll=True))
#print(funccall.parseString('PROVIDE_HIDDEN (__preinit_array_start = .);', parseAll=True))
#print(section_def.parseString('''
# .preinit_array :
# {
# PROVIDE_HIDDEN (__preinit_array_start = .);
# KEEP (*(.preinit_array*))
# PROVIDE_HIDDEN (__preinit_array_end = .);
# } >FLASH''', parseAll=True))
#print(match_expr.parseString('*(SORT(.init_array.*))', parseAll=True))
#print(funccall.parseString('KEEP (*(SORT(.init_array.*)))', parseAll=True))
#print(section_def.parseString('''
# .init_array :
# {
# PROVIDE_HIDDEN (__init_array_start = .);
# KEEP (*(SORT(.init_array.*)))
# KEEP (*(.init_array*))
# PROVIDE_HIDDEN (__init_array_end = .);
# } >FLASH
# ''', parseAll=True))
#print(match_expr.parseString('*(.ARM.extab* .gnu.linkonce.armextab.*)', parseAll=True))
#print(formula.parseString('. + _Min_Heap_Size', parseAll=True))
#print(assignment.parseString('. = . + _Min_Heap_Size;', parseAll=True))
#print(sections_toplevel.parseString('''
# SECTIONS
# {
# .ARMattributes : { }
# }
# ''', parseAll=True))
#sys.exit(0)

View file

@ -0,0 +1,161 @@
import tempfile
import os
from os import path
import sys
import re
import subprocess
from contextlib import contextmanager
from collections import defaultdict
import cxxfilt
from elftools.elf.elffile import ELFFile
from elftools.elf.descriptions import describe_symbol_type
import libarchive
@contextmanager
def chdir(newdir):
old_cwd = os.getcwd()
try:
os.chdir(newdir)
yield
finally:
os.chdir(old_cwd)
def trace_source_files(linker, cmdline, trace_sections=[]):
with tempfile.TemporaryDirectory() as tempdir:
out_path = path.join(tempdir, 'output.elf')
output = subprocess.check_output([linker, '-o', out_path, f'-Wl,--cref', *cmdline])
lines = [ line.strip() for line in output.decode().splitlines() ]
# FIXME also find isr vector table references
defs = {}
for line in lines[lines.index('Cross Reference Table')+3:]:
try:
*left, right = line.split()
if left:
defs[' '.join(left)] = right
except:
pass
refs = defaultdict(lambda: set())
syms = {}
for sym, obj in defs.items():
fn, _, member = re.match('^([^()]+)(\((.+)\))?$', obj).groups()
fn = path.abspath(fn)
if member:
subprocess.check_call(['ar', 'x', '--output', tempdir, fn, member])
fn = path.join(tempdir, member)
with open(fn, 'rb') as f:
elf = ELFFile(f)
symtab = elf.get_section_by_name('.symtab')
symtab_demangled = { cxxfilt.demangle(nsym.name).replace(' ', ''): i
for i, nsym in enumerate(symtab.iter_symbols()) }
def lookup_size(name):
name_normalized = name.replace(' ', '')
if name_normalized in symtab_demangled:
entry = symtab.get_symbol(symtab_demangled[name_normalized])
return entry['st_size']
else:
return None
syms[sym] = fn, lookup_size(sym)
s = set()
sec_map = { sec.name: i for i, sec in enumerate(elf.iter_sections()) }
sec_name = f'.rel.text.{sym}'
matches = [ i for name, i in sec_map.items() if re.match(f'\.rel\..*\.{sym}', name) ]
if matches:
sec = elf.get_section(matches[0])
for reloc in sec.iter_relocations():
refsym = symtab.get_symbol(reloc['r_info_sym'])
s.add(refsym.name)
if refsym.name not in defs:
syms[refsym.name] = fn, lookup_size(refsym.name)
refs[sym] = s
for tsec in trace_sections:
matches = [ i for name, i in sec_map.items() if name == f'.rel{tsec}' ]
s = set()
if matches:
sec = elf.get_section(matches[0])
for reloc in sec.iter_relocations():
refsym = symtab.get_symbol(reloc['r_info_sym'])
s.add(refsym.name)
refs[tsec.replace('.', '_')] |= s
syms_out = set()
with open(out_path, 'rb') as f:
elf = ELFFile(f)
symtab = elf.get_section_by_name('.symtab')
for sym in symtab.iter_symbols():
if describe_symbol_type(sym['st_info']['type']) == 'FUNC':
syms_out.add(sym.name)
#for sym in defs:
# entry = symtab.get_symbol_by_name(sym)
# if entry is None:
# syms[sym] = defs[sym], None
# else:
# syms[sym] = defs[sym], entry[0]['st_size']
return syms, refs, syms_out
@contextmanager
def wrap(leader='', print=print, left='{', right='}'):
print(leader, left)
yield lambda *args, **kwargs: print(' ', *args, **kwargs)
print(right)
def mangle(name):
return re.sub('[^a-zA-Z0-9_]', '_', name)
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--trace-sections', type=str, action='append', default=[])
parser.add_argument('linker_binary')
parser.add_argument('linker_args', nargs=argparse.REMAINDER)
args = parser.parse_args()
trace_sections = args.trace_sections
trace_sections_mangled = { sec.replace('.', '_') for sec in trace_sections }
syms, refs, syms_out = trace_source_files(args.linker_binary, args.linker_args, trace_sections)
clusters = defaultdict(lambda: [])
for sym, (obj, size) in syms.items():
clusters[obj].append((sym, size))
obj_size = defaultdict(lambda: 0)
for name, (obj, size) in syms.items():
if size is not None:
obj_size[obj] += size
with wrap('digraph G', print) as lvl1print:
print('rankdir=LR;')
print()
for i, (obj, syms) in enumerate(clusters.items()):
with wrap(f'subgraph cluster_{i}', lvl1print) as lvl2print:
lvl2print(f'label = "{obj} <{obj_size[obj]}>";')
lvl2print()
for sym, size in syms:
if sym in syms_out:
lvl2print(f'{mangle(sym)}[label = "{sym} <{size}>"];')
lvl1print()
for start, ends in refs.items():
for end in ends:
if end and (start in syms_out or start in trace_sections_mangled) and end in syms_out:
lvl1print(f'{mangle(start)} -> {mangle(end)};')
for sec in trace_sections:
lvl1print(f'{sec.replace(".", "_")} [label = "section {sec}"];')

View file

@ -0,0 +1,62 @@
#!/usr/bin/env python3
def parse_linker_script(data):
pass
def link(groups):
defined_symbols = {}
undefined_symbols = set()
for group, files in groups:
while True:
found_something = False
for fn in files:
symbols = load_symbols(fn)
for symbol in symbols:
if symbol in defined_symbols:
if not group or not found_something:
break
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('-T', '--script', type=str, help='Linker script to use')
parser.add_argument('-o', '--output', type=str, help='Output file to produce')
args, rest = parser.parse_known_intermixed_args()
print(rest)
addprefix = lambda *xs: [ prefix + opt for opt in xs for prefix in ('', '-Wl,') ]
START_GROUP = addprefix('-(', '--start-group')
END_GROUP = addprefix('-)', '--end-group')
GROUP_OPTS = [*START_GROUP, *END_GROUP]
input_files = [ arg for arg in rest if not arg.startswith('-') or arg in GROUP_OPTS ]
def input_file_iter(input_files):
group = False
files = []
for arg in input_files:
if arg in START_GROUP:
assert not group
if files:
yield False, files # nested -Wl,--start-group
group, files = True, []
elif arg in END_GROUP:
assert group # missing -Wl,--start-group
if files:
yield True, files
group, files = False, []
else:
files.append(arg)
assert not group # missing -Wl,--end-group
if files:
yield False, files

View file

@ -0,0 +1,118 @@
#!/usr/bin/env python3
import re
import subprocess
import tempfile
import pprint
ARCHIVE_RE = r'([^(]*)(\([^)]*\))?'
def trace_source_files(linker, cmdline):
with tempfile.NamedTemporaryFile() as mapfile:
output = subprocess.check_output([linker, f'-Wl,--Map={mapfile.name}', *cmdline])
# intentionally use generator here
idx = 0
lines = [ line.rstrip() for line in mapfile.read().decode().splitlines() if line.strip() ]
for idx, line in enumerate(lines[idx:], start=idx):
#print('Dropping', line)
if line == 'Linker script and memory map':
break
idx += 1
objects = []
symbols = {}
sections = {}
current_object = None
last_offset = None
last_symbol = None
cont_sec = None
cont_ind = None
current_section = None
for idx, line in enumerate(lines[idx:], start=idx):
print(f'Processing >{line}')
if line.startswith('LOAD'):
_load, obj = line.split()
objects.append(obj)
continue
if line.startswith('OUTPUT'):
break
m = re.match(r'^( ?)([^ ]+)? +(0x[0-9a-z]+) +(0x[0-9a-z]+)?(.*)?$', line)
if m is None:
m = re.match(r'^( ?)([^ ]+)?$', line)
if m:
cont_ind, cont_sec = m.groups()
else:
cont_ind, cont_sec = None, None
last_offset, last_symbol = None, None
continue
indent, sec, offx, size, sym_or_src = m.groups()
if sec is None:
sec = cont_sec
ind = cont_ind
cont_sec = None
cont_ind = None
print(f'vals: indent={indent} sec={sec} offx={offx} size={size} sym_or_src={sym_or_src}')
if not re.match('^[a-zA-Z_0-9<>():*]+$', sym_or_src):
continue
if indent == '':
print(f'Section: {sec} 0x{size:x}')
current_section = sec
sections[sec] = size
last_offset = None
last_symbol = None
continue
if offx is not None:
offx = int(offx, 16)
if size is not None:
size = int(size, 16)
if size is not None and sym_or_src is not None:
# archive/object line
archive, _member = re.match(ARCHIVE_RE, sym_or_src).groups()
current_object = archive
last_offset = offx
else:
if sym_or_src is not None:
assert size is None
if last_offset is not None:
last_size = offx - last_offset
symbols[last_symbol] = (last_size, current_section)
print(f'Symbol: {last_symbol} 0x{last_size:x} @{current_section}')
last_offset = offx
last_symbol = sym_or_src
idx += 1
for idx, line in enumerate(lines[idx:], start=idx):
if line == 'Cross Reference Table':
break
idx += 1
# map which symbol was pulled from which object in the end
used_defs = {}
for line in lines:
*left, right = line.split()
archive, _member = re.match(ARCHIVE_RE, right).groups()
if left:
used_defs[''.join(left)] = archive
#pprint.pprint(symbols)
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('linker_binary')
parser.add_argument('linker_args', nargs=argparse.REMAINDER)
args = parser.parse_args()
source_files = trace_source_files(args.linker_binary, args.linker_args)

View file

@ -0,0 +1,129 @@
import re
from collections import defaultdict, namedtuple
Section = namedtuple('Section', ['name', 'offset', 'objects'])
ObjectEntry = namedtuple('ObjectEntry', ['filename', 'object', 'offset', 'size'])
FileEntry = namedtuple('FileEntry', ['section', 'object', 'offset', 'length'])
class Memory:
def __init__(self, name, origin, length, attrs=''):
self.name, self.origin, self.length, self.attrs = name, origin, length, attrs
self.sections = {}
self.files = defaultdict(lambda: [])
self.totals = defaultdict(lambda: 0)
def add_toplevel(self, name, offx, length):
self.sections[name] = Section(offx, length, [])
def add_obj(self, name, offx, length, fn, obj):
base_section, sep, subsec = name[1:].partition('.')
base_section = '.'+base_section
if base_section in self.sections:
sec = secname, secoffx, secobjs = self.sections[base_section]
secobjs.append(ObjectEntry(fn, obj, offx, length))
else:
sec = None
self.files[fn].append(FileEntry(sec, obj, offx, length))
self.totals[fn] += length
class MapFile:
def __init__(self, s):
self._lines = s.splitlines()
self.memcfg = {}
self.defaultmem = Memory('default', 0, 0xffffffffffffffff)
self._parse()
def __getitem__(self, offx_or_name):
''' Lookup a memory area by name or address '''
if offx_or_name in self.memcfg:
return self.memcfg[offx_or_name]
elif isinstance(offx_or_name, int):
for mem in self.memcfg.values():
if mem.origin <= offx_or_name < mem.origin+mem.length:
return mem
else:
return self.defaultmem
raise ValueError('Invalid argument type for indexing')
def _skip(self, regex):
matcher = re.compile(regex)
for l in self:
if matcher.match(l):
break
def __iter__(self):
while self._lines:
yield self._lines.pop(0)
def _parse(self):
self._skip('^Memory Configuration')
# Parse memory segmentation info
self._skip('^Name')
for l in self:
if not l:
break
name, origin, length, *attrs = l.split()
if not name.startswith('*'):
self.memcfg[name] = Memory(name, int(origin, 16), int(length, 16), attrs[0] if attrs else '')
# Parse section information
toplevel_m = re.compile('^(\.[a-zA-Z0-9_.]+)\s+(0x[0-9a-fA-F]+)\s+(0x[0-9a-fA-F]+)')
secondlevel_m = re.compile('^ (\.[a-zA-Z0-9_.]+)\s+(0x[0-9a-fA-F]+)\s+(0x[0-9a-fA-F]+)\s+(.*)$')
secondlevel_linebreak_m = re.compile('^ (\.[a-zA-Z0-9_.]+)\n')
filelike = re.compile('^(/?[^()]*\.[a-zA-Z0-9-_]+)(\(.*\))?')
linebreak_section = None
for l in self:
# Toplevel section
match = toplevel_m.match(l)
if match:
name, offx, length = match.groups()
offx, length = int(offx, 16), int(length, 16)
self[offx].add_toplevel(name, offx, length)
match = secondlevel_linebreak_m.match(l)
if match:
linebreak_section, = match.groups()
continue
if linebreak_section:
l = ' {} {}'.format(linebreak_section, l)
linebreak_section = None
# Second-level section
match = secondlevel_m.match(l)
if match:
name, offx, length, misc = match.groups()
match = filelike.match(misc)
if match:
fn, obj = match.groups()
obj = obj.strip('()') if obj else None
offx, length = int(offx, 16), int(length, 16)
self[offx].add_obj(name, offx, length, fn, obj)
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='Parser GCC map file')
parser.add_argument('mapfile', type=argparse.FileType('r'), help='The GCC .map file to parse')
parser.add_argument('-m', '--memory', type=str, help='The memory segments to print, comma-separated')
args = parser.parse_args()
mf = MapFile(args.mapfile.read())
args.mapfile.close()
mems = args.memory.split(',') if args.memory else mf.memcfg.keys()
for name in mems:
mem = mf.memcfg[name]
print('Symbols by file for memory', name)
for tot, fn in reversed(sorted( (tot, fn) for fn, tot in mem.totals.items() )):
print(' {:>8} {}'.format(tot, fn))
for length, offx, sec, obj in reversed(sorted(( (length, offx, sec, obj) for sec, obj, offx, length in
mem.files[fn] ), key=lambda e: e[0] )):
name = sec.name if sec else None
print(' {:>8} {:>#08x} {}'.format(length, offx, obj))
#print('{:>16} 0x{:016x} 0x{:016x} ({:>24}) {}'.format(name, origin, length, length, attrs))