Add size tracing tool

2020-03-12 21:59:45 +01:00 · 2020-03-12 21:59:45 +01:00 · bf7e8701c7
commit bf7e8701c7
parent 03e8443e2e
5 changed files with 596 additions and 0 deletions
--- a/controller/fw/tools/ldparser.py
+++ b/controller/fw/tools/ldparser.py
@ -0,0 +1,126 @@
+
+import sys
+
+import pyparsing as pp
+from pyparsing import pyparsing_common as ppc
+
+LPAREN, RPAREN, LBRACE, RBRACE, LBROK, RBROK, COLON, SEMICOLON, EQUALS, COMMA = map(pp.Suppress, '(){}<>:;=,')
+
+parse_suffix_int = lambda lit: int(lit[:-1]) * (10**(3*(1 + 'kmgtpe'.find(lit[-1].lower()))))
+si_suffix = pp.oneOf('k m g t p e', caseless=True)
+
+numeric_literal = pp.Regex('0x[0-9a-fA-F]+').setName('hex int').setParseAction(pp.tokenMap(int, 16)) \
+        | (pp.Regex('[0-9]+[kKmMgGtTpPeE]')).setName('size int').setParseAction(pp.tokenMap(parse_suffix_int)) \
+        | pp.Word(pp.nums).setName('int').setParseAction(pp.tokenMap(int))
+access_def = pp.Regex('[rR]?[wW]?[xX]?').setName('access literal').setParseAction(pp.tokenMap(str.lower))
+
+origin_expr = pp.Suppress(pp.CaselessKeyword('ORIGIN')) + EQUALS + numeric_literal
+length_expr = pp.Suppress(pp.CaselessKeyword('LENGTH')) + EQUALS + numeric_literal
+mem_expr = pp.Group(ppc.identifier + LPAREN + access_def + RPAREN + COLON + origin_expr + COMMA + length_expr)
+mem_contents = pp.ZeroOrMore(mem_expr)
+
+mem_toplevel = pp.CaselessKeyword("MEMORY") + pp.Group(LBRACE + pp.Optional(mem_contents, []) + RBRACE)
+
+glob = pp.Word(pp.alphanums + '._*')
+match_expr = pp.Forward()
+assignment = pp.Forward()
+funccall = pp.Group(pp.Word(pp.alphas + '_') + LPAREN + (assignment | numeric_literal | match_expr | glob | ppc.identifier) + RPAREN + pp.Optional(SEMICOLON))
+value = numeric_literal | funccall | ppc.identifier | '.'
+formula = (value + pp.oneOf('+ = * / %') + value) | value
+# suppress stray semicolons
+assignment << (SEMICOLON | pp.Group((ppc.identifier | '.') + EQUALS + (formula | value) + pp.Optional(SEMICOLON)))
+match_expr << (glob + LPAREN + pp.OneOrMore(funccall | glob) + RPAREN)
+
+section_contents = pp.ZeroOrMore(assignment | funccall | match_expr);
+
+section_name = pp.Regex('\.[a-zA-Z0-9_.]+')
+section_def = pp.Group(section_name + pp.Optional(numeric_literal) + COLON + LBRACE + pp.Group(section_contents) +
+        RBRACE + pp.Optional(RBROK + ppc.identifier + pp.Optional('AT' + RBROK + ppc.identifier)))
+sec_contents = pp.ZeroOrMore(section_def | assignment)
+
+sections_toplevel = pp.Group(pp.CaselessKeyword("SECTIONS").suppress() + LBRACE + sec_contents + RBRACE)
+
+toplevel_elements = mem_toplevel | funccall | sections_toplevel | assignment
+ldscript = pp.Group(pp.ZeroOrMore(toplevel_elements))
+ldscript.ignore(pp.cppStyleComment)
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('linker_script', type=argparse.FileType('r'))
+    args = parser.parse_args()
+
+    #print(mem_expr.parseString('FLASH (rx) : ORIGIN = 0x0800000, LENGTH = 512K', parseAll=True))
+    # print(ldscript.parseString('''
+    #     /* Entry Point */
+    #     ENTRY(Reset_Handler)
+    #
+    #     /* Highest address of the user mode stack */
+    #     _estack = 0x20020000;    /* end of RAM */
+    #     /* Generate a link error if heap and stack don't fit into RAM */
+    #     _Min_Heap_Size = 0x200;;      /* required amount of heap  */
+    #     _Min_Stack_Size = 0x400;; /* required amount of stack */
+    #     ''', parseAll=True))
+
+    print(ldscript.parseFile(args.linker_script, parseAll=True))
+    #print(funccall.parseString('KEEP(*(.isr_vector))'))
+    #print(section_contents.parseString('''
+    #        . = ALIGN(4);
+    #        KEEP(*(.isr_vector)) /* Startup code */
+    #        . = ALIGN(4);
+    #        ''', parseAll=True))
+
+    #print(section_def.parseString('''
+    #      .text :
+    #      {
+    #        . = ALIGN(4);
+    #        *(.text)           /* .text sections (code) */
+    #        *(.text*)          /* .text* sections (code) */
+    #        *(.glue_7)         /* glue arm to thumb code */
+    #        *(.glue_7t)        /* glue thumb to arm code */
+    #        *(.eh_frame)
+    #
+    #        KEEP (*(.init))
+    #        KEEP (*(.fini))
+    #
+    #        . = ALIGN(4);
+    #        _etext = .;        /* define a global symbols at end of code */
+    #      } >FLASH
+    #      ''', parseAll=True))
+
+    #print(section_def.parseString('.ARM.extab   : { *(.ARM.extab* .gnu.linkonce.armextab.*) } >FLASH', parseAll=True))
+
+    #print(assignment.parseString('__preinit_array_start = .', parseAll=True))
+    #print(assignment.parseString('a = 23', parseAll=True))
+    #print(funccall.parseString('foo (a=23)', parseAll=True))
+    #print(funccall.parseString('PROVIDE_HIDDEN (__preinit_array_start = .);', parseAll=True))
+    #print(section_def.parseString('''
+    #      .preinit_array     :
+    #      {
+    #        PROVIDE_HIDDEN (__preinit_array_start = .);
+    #        KEEP (*(.preinit_array*))
+    #        PROVIDE_HIDDEN (__preinit_array_end = .);
+    #        } >FLASH''', parseAll=True))
+    #print(match_expr.parseString('*(SORT(.init_array.*))', parseAll=True))
+    #print(funccall.parseString('KEEP (*(SORT(.init_array.*)))', parseAll=True))
+    #print(section_def.parseString('''
+    #      .init_array :
+    #      {
+    #        PROVIDE_HIDDEN (__init_array_start = .);
+    #        KEEP (*(SORT(.init_array.*)))
+    #        KEEP (*(.init_array*))
+    #        PROVIDE_HIDDEN (__init_array_end = .);
+    #      } >FLASH
+    #      ''', parseAll=True))
+
+    #print(match_expr.parseString('*(.ARM.extab* .gnu.linkonce.armextab.*)', parseAll=True))
+    #print(formula.parseString('. + _Min_Heap_Size', parseAll=True))
+    #print(assignment.parseString('. = . + _Min_Heap_Size;', parseAll=True))
+    #print(sections_toplevel.parseString('''
+    #    SECTIONS
+    #    {
+    #      .ARMattributes : {  }
+    #    }
+    #      ''', parseAll=True))
+    #sys.exit(0)
+
--- a/controller/fw/tools/linkmem.py
+++ b/controller/fw/tools/linkmem.py
@ -0,0 +1,161 @@
+
+import tempfile
+import os
+from os import path
+import sys
+import re
+import subprocess
+from contextlib import contextmanager
+from collections import defaultdict
+
+import cxxfilt
+
+from elftools.elf.elffile import ELFFile
+from elftools.elf.descriptions import describe_symbol_type
+import libarchive
+
+@contextmanager
+def chdir(newdir):
+    old_cwd = os.getcwd()
+    try:
+        os.chdir(newdir)
+        yield
+    finally:
+        os.chdir(old_cwd)
+
+
+def trace_source_files(linker, cmdline, trace_sections=[]):
+    with tempfile.TemporaryDirectory() as tempdir:
+        out_path = path.join(tempdir, 'output.elf')
+        output = subprocess.check_output([linker, '-o', out_path, f'-Wl,--cref', *cmdline])
+        lines = [ line.strip() for line in output.decode().splitlines() ]
+        # FIXME also find isr vector table references
+
+        defs = {}
+        for line in lines[lines.index('Cross Reference Table')+3:]:
+            try:
+                *left, right = line.split()
+                if left:
+                    defs[' '.join(left)] = right 
+            except:
+                pass
+
+        refs = defaultdict(lambda: set())
+        syms = {}
+        for sym, obj in defs.items():
+            fn, _, member = re.match('^([^()]+)(\((.+)\))?$', obj).groups()
+            fn = path.abspath(fn)
+
+            if member:
+                subprocess.check_call(['ar', 'x', '--output', tempdir, fn, member])
+                fn = path.join(tempdir, member)
+
+            with open(fn, 'rb') as f:
+                elf = ELFFile(f)
+
+                symtab = elf.get_section_by_name('.symtab')
+                
+                symtab_demangled = { cxxfilt.demangle(nsym.name).replace(' ', ''): i
+                        for i, nsym in enumerate(symtab.iter_symbols()) }
+
+                def lookup_size(name):
+                    name_normalized = name.replace(' ', '')
+                    if name_normalized in symtab_demangled:
+                        entry = symtab.get_symbol(symtab_demangled[name_normalized])
+                        return entry['st_size']
+                    else:
+                        return None
+
+                syms[sym] = fn, lookup_size(sym)
+
+                s = set()
+                sec_map = { sec.name: i for i, sec in enumerate(elf.iter_sections()) }
+                sec_name = f'.rel.text.{sym}'
+                matches = [ i for name, i in sec_map.items() if re.match(f'\.rel\..*\.{sym}', name) ]
+                if matches:
+                    sec = elf.get_section(matches[0])
+                    for reloc in sec.iter_relocations():
+                        refsym = symtab.get_symbol(reloc['r_info_sym'])
+                        s.add(refsym.name)
+                        
+                        if refsym.name not in defs:
+                            syms[refsym.name] = fn, lookup_size(refsym.name)
+                refs[sym] = s
+
+                for tsec in trace_sections:
+                    matches = [ i for name, i in sec_map.items() if name == f'.rel{tsec}' ]
+                    s = set()
+                    if matches:
+                        sec = elf.get_section(matches[0])
+                        for reloc in sec.iter_relocations():
+                            refsym = symtab.get_symbol(reloc['r_info_sym'])
+                            s.add(refsym.name)
+                    refs[tsec.replace('.', '_')] |= s
+
+        syms_out = set()
+        with open(out_path, 'rb') as f:
+            elf = ELFFile(f)
+            symtab = elf.get_section_by_name('.symtab')
+            for sym in symtab.iter_symbols():
+                if describe_symbol_type(sym['st_info']['type']) == 'FUNC':
+                    syms_out.add(sym.name)
+            #for sym in defs:
+            #    entry = symtab.get_symbol_by_name(sym)
+            #    if entry is None:
+            #        syms[sym] = defs[sym], None
+            #    else:
+            #        syms[sym] = defs[sym], entry[0]['st_size']
+
+        return syms, refs, syms_out
+
+@contextmanager
+def wrap(leader='', print=print, left='{', right='}'):
+    print(leader, left)
+    yield lambda *args, **kwargs: print('   ', *args, **kwargs)
+    print(right)
+
+def mangle(name):
+    return re.sub('[^a-zA-Z0-9_]', '_', name)
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--trace-sections', type=str, action='append', default=[])
+    parser.add_argument('linker_binary')
+    parser.add_argument('linker_args', nargs=argparse.REMAINDER)
+    args = parser.parse_args()
+
+    trace_sections = args.trace_sections
+    trace_sections_mangled = { sec.replace('.', '_') for sec in trace_sections }
+    syms, refs, syms_out = trace_source_files(args.linker_binary, args.linker_args, trace_sections)
+
+    clusters = defaultdict(lambda: [])
+    for sym, (obj, size) in syms.items():
+        clusters[obj].append((sym, size))
+
+    obj_size = defaultdict(lambda: 0)
+    for name, (obj, size) in syms.items():
+        if size is not None:
+            obj_size[obj] += size
+
+    with wrap('digraph G', print) as lvl1print:
+        print('rankdir=LR;')
+        print()
+
+        for i, (obj, syms) in enumerate(clusters.items()):
+            with wrap(f'subgraph cluster_{i}', lvl1print) as lvl2print:
+                lvl2print(f'label = "{obj} <{obj_size[obj]}>";')
+                lvl2print()
+                for sym, size in syms:
+                    if sym in syms_out:
+                        lvl2print(f'{mangle(sym)}[label = "{sym} <{size}>"];')
+            lvl1print()
+
+        for start, ends in refs.items():
+            for end in ends:
+                if end and (start in syms_out or start in trace_sections_mangled) and end in syms_out:
+                    lvl1print(f'{mangle(start)} -> {mangle(end)};')
+
+        for sec in trace_sections:
+            lvl1print(f'{sec.replace(".", "_")} [label = "section {sec}"];')
+
--- a/controller/fw/tools/linksize.py
+++ b/controller/fw/tools/linksize.py
@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+
+def parse_linker_script(data):
+    pass
+
+def link(groups):
+    defined_symbols = {}
+    undefined_symbols = set()
+    for group, files in groups:
+        while True:
+            found_something = False
+
+            for fn in files:
+                symbols = load_symbols(fn)
+                for symbol in symbols:
+                    if symbol in defined_symbols:
+
+            if not group or not found_something:
+                break
+
+
+if __name__ == '__main__':
+
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-T', '--script', type=str, help='Linker script to use')
+    parser.add_argument('-o', '--output', type=str, help='Output file to produce')
+    args, rest = parser.parse_known_intermixed_args()
+    print(rest)
+
+    addprefix = lambda *xs: [ prefix + opt for opt in xs for prefix in ('', '-Wl,') ]
+    START_GROUP = addprefix('-(', '--start-group')
+    END_GROUP = addprefix('-)', '--end-group')
+    GROUP_OPTS = [*START_GROUP, *END_GROUP]
+    input_files = [ arg for arg in rest if not arg.startswith('-') or arg in GROUP_OPTS ]
+
+    def input_file_iter(input_files):
+        group = False
+        files = []
+        for arg in input_files:
+            if arg in START_GROUP:
+                assert not group
+
+                if files:
+                    yield False, files # nested -Wl,--start-group
+                group, files = True, []
+
+            elif arg in END_GROUP:
+                assert group # missing -Wl,--start-group
+                if files:
+                    yield True, files
+                group, files = False, []
+
+            else:
+                files.append(arg)
+
+        assert not group # missing -Wl,--end-group
+        if files:
+            yield False, files
+
+
+
--- a/controller/fw/tools/linktracer.py
+++ b/controller/fw/tools/linktracer.py
@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+
+import re
+import subprocess
+import tempfile
+import pprint
+
+ARCHIVE_RE = r'([^(]*)(\([^)]*\))?'
+
+def trace_source_files(linker, cmdline):
+    with tempfile.NamedTemporaryFile() as mapfile:
+        output = subprocess.check_output([linker, f'-Wl,--Map={mapfile.name}', *cmdline])
+
+        # intentionally use generator here
+        idx = 0 
+        lines = [ line.rstrip() for line in mapfile.read().decode().splitlines() if line.strip() ]
+        
+        for idx, line in enumerate(lines[idx:], start=idx):
+            #print('Dropping', line)
+            if line == 'Linker script and memory map':
+                break
+
+        idx += 1
+        objects = []
+        symbols = {}
+        sections = {}
+        current_object = None
+        last_offset = None
+        last_symbol = None
+        cont_sec = None
+        cont_ind = None
+        current_section = None
+        for idx, line in enumerate(lines[idx:], start=idx):
+            print(f'Processing >{line}')
+            if line.startswith('LOAD'):
+                _load, obj = line.split()
+                objects.append(obj)
+                continue
+
+            if line.startswith('OUTPUT'):
+                break
+
+            m = re.match(r'^( ?)([^ ]+)? +(0x[0-9a-z]+) +(0x[0-9a-z]+)?(.*)?$', line)
+            if m is None:
+                m = re.match(r'^( ?)([^ ]+)?$', line)
+                if m:
+                    cont_ind, cont_sec = m.groups()
+                else:
+                    cont_ind, cont_sec = None, None
+                last_offset, last_symbol = None, None
+                continue
+            indent, sec, offx, size, sym_or_src = m.groups()
+            if sec is None:
+                sec = cont_sec
+                ind = cont_ind
+            cont_sec = None
+            cont_ind = None
+            print(f'vals: indent={indent} sec={sec} offx={offx} size={size} sym_or_src={sym_or_src}')
+            if not re.match('^[a-zA-Z_0-9<>():*]+$', sym_or_src):
+                continue
+
+            if indent == '':
+                print(f'Section: {sec} 0x{size:x}')
+                current_section = sec
+                sections[sec] = size
+                last_offset = None
+                last_symbol = None
+                continue
+
+            if offx is not None:
+                offx = int(offx, 16)
+            if size is not None:
+                size = int(size, 16)
+
+            if size is not None and sym_or_src is not None:
+                # archive/object line
+                archive, _member = re.match(ARCHIVE_RE, sym_or_src).groups()
+                current_object = archive
+                last_offset = offx
+            else:
+                if sym_or_src is not None:
+                    assert size is None
+                    if last_offset is not None:
+                        last_size = offx - last_offset
+                        symbols[last_symbol] = (last_size, current_section)
+                        print(f'Symbol: {last_symbol} 0x{last_size:x} @{current_section}')
+                    last_offset = offx
+                    last_symbol = sym_or_src
+
+        idx += 1
+
+        for idx, line in enumerate(lines[idx:], start=idx):
+            if line == 'Cross Reference Table':
+                break
+
+        idx += 1
+
+        # map which symbol was pulled from which object in the end
+        used_defs = {}
+        for line in lines:
+            *left, right = line.split()
+
+            archive, _member = re.match(ARCHIVE_RE, right).groups()
+            if left:
+                used_defs[''.join(left)] = archive
+
+        #pprint.pprint(symbols)
+
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('linker_binary')
+    parser.add_argument('linker_args', nargs=argparse.REMAINDER)
+    args = parser.parse_args()
+
+    source_files = trace_source_files(args.linker_binary, args.linker_args)
+
--- a/controller/fw/tools/mapparse.py
+++ b/controller/fw/tools/mapparse.py
@ -0,0 +1,129 @@
+
+import re
+from collections import defaultdict, namedtuple
+
+Section = namedtuple('Section', ['name', 'offset', 'objects'])
+ObjectEntry = namedtuple('ObjectEntry', ['filename', 'object', 'offset', 'size'])
+FileEntry = namedtuple('FileEntry', ['section', 'object', 'offset', 'length'])
+
+class Memory:
+    def __init__(self, name, origin, length, attrs=''):
+        self.name, self.origin, self.length, self.attrs = name, origin, length, attrs
+        self.sections = {}
+        self.files = defaultdict(lambda: [])
+        self.totals = defaultdict(lambda: 0)
+
+    def add_toplevel(self, name, offx, length):
+        self.sections[name] = Section(offx, length, [])
+
+    def add_obj(self, name, offx, length, fn, obj):
+        base_section, sep, subsec = name[1:].partition('.')
+        base_section = '.'+base_section
+        if base_section in self.sections:
+            sec = secname, secoffx, secobjs = self.sections[base_section]
+            secobjs.append(ObjectEntry(fn, obj, offx, length))
+        else:
+            sec = None
+        self.files[fn].append(FileEntry(sec, obj, offx, length))
+        self.totals[fn] += length
+
+class MapFile:
+    def __init__(self, s):
+        self._lines = s.splitlines()
+        self.memcfg = {}
+        self.defaultmem = Memory('default', 0, 0xffffffffffffffff)
+        self._parse()
+
+    def __getitem__(self, offx_or_name):
+        ''' Lookup a memory area by name or address '''
+        if offx_or_name in self.memcfg:
+            return self.memcfg[offx_or_name]
+
+        elif isinstance(offx_or_name, int):
+            for mem in self.memcfg.values():
+                if mem.origin <= offx_or_name < mem.origin+mem.length:
+                    return mem
+            else:
+                return self.defaultmem
+
+        raise ValueError('Invalid argument type for indexing')
+
+    def _skip(self, regex):
+        matcher = re.compile(regex)
+        for l in self:
+            if matcher.match(l):
+                break
+
+    def __iter__(self):
+        while self._lines:
+            yield self._lines.pop(0)
+
+    def _parse(self):
+        self._skip('^Memory Configuration')
+
+        # Parse memory segmentation info
+        self._skip('^Name')
+        for l in self:
+            if not l:
+                break
+            name, origin, length, *attrs = l.split()
+            if not name.startswith('*'):
+                self.memcfg[name] = Memory(name, int(origin, 16), int(length, 16), attrs[0] if attrs else '')
+
+        # Parse section information
+        toplevel_m = re.compile('^(\.[a-zA-Z0-9_.]+)\s+(0x[0-9a-fA-F]+)\s+(0x[0-9a-fA-F]+)')
+        secondlevel_m = re.compile('^ (\.[a-zA-Z0-9_.]+)\s+(0x[0-9a-fA-F]+)\s+(0x[0-9a-fA-F]+)\s+(.*)$')
+        secondlevel_linebreak_m = re.compile('^ (\.[a-zA-Z0-9_.]+)\n')
+        filelike = re.compile('^(/?[^()]*\.[a-zA-Z0-9-_]+)(\(.*\))?')
+        linebreak_section = None
+        for l in self:
+            # Toplevel section
+            match = toplevel_m.match(l)
+            if match:
+                name, offx, length = match.groups()
+                offx, length = int(offx, 16), int(length, 16)
+                self[offx].add_toplevel(name, offx, length)
+
+            match = secondlevel_linebreak_m.match(l)
+            if match:
+                linebreak_section, = match.groups()
+                continue
+
+            if linebreak_section:
+                l = ' {} {}'.format(linebreak_section, l)
+                linebreak_section = None
+
+            # Second-level section
+            match = secondlevel_m.match(l)
+            if match:
+                name, offx, length, misc = match.groups()
+                match = filelike.match(misc)
+                if match:
+                    fn, obj = match.groups()
+                    obj = obj.strip('()') if obj else None
+                    offx, length = int(offx, 16), int(length, 16)
+                    self[offx].add_obj(name, offx, length, fn, obj)
+
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser(description='Parser GCC map file')
+    parser.add_argument('mapfile', type=argparse.FileType('r'), help='The GCC .map file to parse')
+    parser.add_argument('-m', '--memory', type=str, help='The memory segments to print, comma-separated')
+    args = parser.parse_args()
+    mf = MapFile(args.mapfile.read())
+    args.mapfile.close()
+
+    mems = args.memory.split(',') if args.memory else mf.memcfg.keys()
+
+    for name in mems:
+        mem = mf.memcfg[name]
+        print('Symbols by file for memory', name)
+        for tot, fn in reversed(sorted( (tot, fn) for fn, tot in mem.totals.items() )):
+            print('    {:>8} {}'.format(tot, fn))
+            for length, offx, sec, obj in reversed(sorted(( (length, offx, sec, obj) for sec, obj, offx, length in
+                mem.files[fn] ), key=lambda e: e[0] )):
+                name = sec.name if sec else None
+                print('       {:>8} {:>#08x} {}'.format(length, offx, obj))
+        #print('{:>16} 0x{:016x} 0x{:016x} ({:>24}) {}'.format(name, origin, length, length, attrs))
+