xref: /optee_os/scripts/symbolize.py (revision 18c5148d357e51235bc842b7826ff6e8da109902)
1#!/usr/bin/env python
2# SPDX-License-Identifier: BSD-2-Clause
3#
4# Copyright (c) 2017, Linaro Limited
5#
6
7
8import argparse
9import glob
10import os
11import re
12import subprocess
13import sys
14
15CALL_STACK_RE = re.compile('Call stack:')
16# This gets the address from lines looking like this:
17# E/TC:0  0x001044a8
18STACK_ADDR_RE = re.compile(
19    r'[UEIDFM]/T[AC]:(\?|[0-9]+) [0-9]* +(?P<addr>0x[0-9a-f]+)')
20ABORT_ADDR_RE = re.compile(r'-abort at address (?P<addr>0x[0-9a-f]+)')
21REGION_RE = re.compile(r'region [0-9]+: va (?P<addr>0x[0-9a-f]+) '
22                       r'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)'
23                       r'( flags .{6} (\[(?P<elf_idx>[0-9]+)\])?)?')
24ELF_LIST_RE = re.compile(r'\[(?P<idx>[0-9]+)\] (?P<uuid>[0-9a-f\-]+)'
25                         r' @ (?P<load_addr>0x[0-9a-f\-]+)')
26
27epilog = '''
28This scripts reads an OP-TEE abort or panic message from stdin and adds debug
29information to the output, such as '<function> at <file>:<line>' next to each
30address in the call stack. Any message generated by OP-TEE and containing a
31call stack can in principle be processed by this script. This currently
32includes aborts and panics from the TEE core as well as from any TA.
33The paths provided on the command line are used to locate the appropriate ELF
34binary (tee.elf or Trusted Application). The GNU binutils (addr2line, objdump,
35nm) are used to extract the debug info. If the CROSS_COMPILE environment
36variable is set, it is used as a prefix to the binutils tools. That is, the
37script will invoke $(CROSS_COMPILE)addr2line etc. If it is not set however,
38the prefix will be determined automatically for each ELF file based on its
39architecture (arm-linux-gnueabihf-, aarch64-linux-gnu-). The resulting command
40is then expected to be found in the user's PATH.
41
42OP-TEE abort and panic messages are sent to the secure console. They look like
43the following:
44
45  E/TC:0 User TA data-abort at address 0xffffdecd (alignment fault)
46  ...
47  E/TC:0 Call stack:
48  E/TC:0  0x4000549e
49  E/TC:0  0x40001f4b
50  E/TC:0  0x4000273f
51  E/TC:0  0x40005da7
52
53Inspired by a script of the same name by the Chromium project.
54
55Sample usage:
56
57  $ scripts/symbolize.py -d out/arm-plat-hikey/core -d ../optee_test/out/ta/*
58  <paste whole dump here>
59  ^D
60'''
61
62
63def get_args():
64    parser = argparse.ArgumentParser(
65        formatter_class=argparse.RawDescriptionHelpFormatter,
66        description='Symbolizes OP-TEE abort dumps',
67        epilog=epilog)
68    parser.add_argument('-d', '--dir', action='append', nargs='+',
69                        help='Search for ELF file in DIR. tee.elf is needed '
70                        'to decode a TEE Core or pseudo-TA abort, while '
71                        '<TA_uuid>.elf is required if a user-mode TA has '
72                        'crashed. For convenience, ELF files may also be '
73                        'given.')
74    parser.add_argument('-s', '--strip_path', nargs='?',
75                        help='Strip STRIP_PATH from file paths (default: '
76                        'current directory, use -s with no argument to show '
77                        'full paths)', default=os.getcwd())
78
79    return parser.parse_args()
80
81
82class Symbolizer(object):
83    def __init__(self, out, dirs, strip_path):
84        self._out = out
85        self._dirs = dirs
86        self._strip_path = strip_path
87        self._addr2line = None
88        self.reset()
89
90    def my_Popen(self, cmd):
91        try:
92            return subprocess.Popen(cmd, stdin=subprocess.PIPE,
93                                    stdout=subprocess.PIPE)
94        except OSError as e:
95            if e.errno == os.errno.ENOENT:
96                print >> sys.stderr, "*** Error:", cmd[0] + \
97                    ": command not found"
98                sys.exit(1)
99
100    def get_elf(self, elf_or_uuid):
101        if not elf_or_uuid.endswith('.elf'):
102            elf_or_uuid += '.elf'
103        for d in self._dirs:
104            if d.endswith(elf_or_uuid) and os.path.isfile(d):
105                return d
106            elf = glob.glob(d + '/' + elf_or_uuid)
107            if elf:
108                return elf[0]
109
110    def set_arch(self):
111        if self._arch:
112            return
113        self._arch = os.getenv('CROSS_COMPILE')
114        if self._arch:
115            return
116        elf = self.get_elf(self._elfs[0][0])
117        if elf is None:
118            return
119        p = subprocess.Popen(['file', self.get_elf(self._elfs[0][0])],
120                             stdout=subprocess.PIPE)
121        output = p.stdout.readlines()
122        p.terminate()
123        if 'ARM aarch64,' in output[0]:
124            self._arch = 'aarch64-linux-gnu-'
125        elif 'ARM,' in output[0]:
126            self._arch = 'arm-linux-gnueabihf-'
127
128    def arch_prefix(self, cmd):
129        self.set_arch()
130        if self._arch is None:
131            return ''
132        return self._arch + cmd
133
134    def spawn_addr2line(self, elf_name):
135        if elf_name is None:
136            return
137        if self._addr2line_elf_name is elf_name:
138            return
139        if self._addr2line:
140            self._addr2line.terminate
141            self._addr2line = None
142        elf = self.get_elf(elf_name)
143        if not elf:
144            return
145        cmd = self.arch_prefix('addr2line')
146        if not cmd:
147            return
148        self._addr2line = self.my_Popen([cmd, '-f', '-p', '-e', elf])
149        self._addr2line_elf_name = elf_name
150
151    # If addr falls into a region that maps a TA ELF file, return the load
152    # address of that file.
153    def elf_load_addr(self, addr):
154        if self._regions:
155            for r in self._regions:
156                r_addr = int(r[0], 16)
157                r_size = int(r[1], 16)
158                i_addr = int(addr, 16)
159                if (i_addr >= r_addr and i_addr < (r_addr + r_size)):
160                    # Found region
161                    elf_idx = r[2]
162                    if elf_idx is not None:
163                        return self._elfs[int(elf_idx)][1]
164            return None
165        else:
166            # tee.elf
167            return '0x0'
168
169    def elf_for_addr(self, addr):
170        l_addr = self.elf_load_addr(addr)
171        if l_addr is None:
172            return None
173        if l_addr is '0x0':
174            return 'tee.elf'
175        for k in self._elfs:
176            e = self._elfs[k]
177            if int(e[1], 16) == int(l_addr, 16):
178                return e[0]
179        return None
180
181    def subtract_load_addr(self, addr):
182        l_addr = self.elf_load_addr(addr)
183        if l_addr is None:
184            return None
185        if int(l_addr, 16) > int(addr, 16):
186            return ''
187        return '0x{:x}'.format(int(addr, 16) - int(l_addr, 16))
188
189    def resolve(self, addr):
190        reladdr = self.subtract_load_addr(addr)
191        self.spawn_addr2line(self.elf_for_addr(addr))
192        if not reladdr or not self._addr2line:
193            return '???'
194        try:
195            print >> self._addr2line.stdin, reladdr
196            ret = self._addr2line.stdout.readline().rstrip('\n')
197        except IOError:
198            ret = '!!!'
199        return ret
200
201    def symbol_plus_offset(self, addr):
202        ret = ''
203        prevsize = 0
204        reladdr = self.subtract_load_addr(addr)
205        elf_name = self.elf_for_addr(addr)
206        if elf_name is None:
207            return ''
208        elf = self.get_elf(elf_name)
209        cmd = self.arch_prefix('nm')
210        if not reladdr or not elf or not cmd:
211            return ''
212        ireladdr = int(reladdr, 16)
213        nm = self.my_Popen([cmd, '--numeric-sort', '--print-size', elf])
214        for line in iter(nm.stdout.readline, ''):
215            try:
216                addr, size, _, name = line.split()
217            except ValueError:
218                # Size is missing
219                try:
220                    addr, _, name = line.split()
221                    size = '0'
222                except ValueError:
223                    # E.g., undefined (external) symbols (line = "U symbol")
224                    continue
225            iaddr = int(addr, 16)
226            isize = int(size, 16)
227            if iaddr == ireladdr:
228                ret = name
229                break
230            if iaddr < ireladdr and iaddr + isize >= ireladdr:
231                offs = ireladdr - iaddr
232                ret = name + '+' + str(offs)
233                break
234            if iaddr > ireladdr and prevsize == 0:
235                offs = iaddr + ireladdr
236                ret = prevname + '+' + str(offs)
237                break
238            prevsize = size
239            prevname = name
240        nm.terminate()
241        return ret
242
243    def section_plus_offset(self, addr):
244        ret = ''
245        reladdr = self.subtract_load_addr(addr)
246        elf_name = self.elf_for_addr(addr)
247        if elf_name is None:
248            return ''
249        elf = self.get_elf(elf_name)
250        cmd = self.arch_prefix('objdump')
251        if not reladdr or not elf or not cmd:
252            return ''
253        iaddr = int(reladdr, 16)
254        objdump = self.my_Popen([cmd, '--section-headers', elf])
255        for line in iter(objdump.stdout.readline, ''):
256            try:
257                idx, name, size, vma, lma, offs, algn = line.split()
258            except ValueError:
259                continue
260            ivma = int(vma, 16)
261            isize = int(size, 16)
262            if ivma == iaddr:
263                ret = name
264                break
265            if ivma < iaddr and ivma + isize >= iaddr:
266                offs = iaddr - ivma
267                ret = name + '+' + str(offs)
268                break
269        objdump.terminate()
270        return ret
271
272    def process_abort(self, line):
273        ret = ''
274        match = re.search(ABORT_ADDR_RE, line)
275        addr = match.group('addr')
276        pre = match.start('addr')
277        post = match.end('addr')
278        sym = self.symbol_plus_offset(addr)
279        sec = self.section_plus_offset(addr)
280        if sym or sec:
281            ret += line[:pre]
282            ret += addr
283            if sym:
284                ret += ' ' + sym
285            if sec:
286                ret += ' ' + sec
287            ret += line[post:]
288        return ret
289
290    # Return all ELF sections with the ALLOC flag
291    def read_sections(self, elf_name):
292        if elf_name is None:
293            return
294        if elf_name in self._sections:
295            return
296        elf = self.get_elf(elf_name)
297        cmd = self.arch_prefix('objdump')
298        if not elf or not cmd:
299            return
300        self._sections[elf_name] = []
301        objdump = self.my_Popen([cmd, '--section-headers', elf])
302        for line in iter(objdump.stdout.readline, ''):
303            try:
304                _, name, size, vma, _, _, _ = line.split()
305            except ValueError:
306                if 'ALLOC' in line:
307                    self._sections[elf_name].append([name, int(vma, 16),
308                                                     int(size, 16)])
309
310    def overlaps(self, section, addr, size):
311        sec_addr = section[1]
312        sec_size = section[2]
313        if not size or not sec_size:
314            return False
315        return ((addr <= (sec_addr + sec_size - 1)) and
316                ((addr + size - 1) >= sec_addr))
317
318    def sections_in_region(self, addr, size, elf_idx):
319        ret = ''
320        addr = self.subtract_load_addr(addr)
321        if not addr:
322            return ''
323        iaddr = int(addr, 16)
324        isize = int(size, 16)
325        elf = self._elfs[int(elf_idx)][0]
326        if elf is None:
327            return ''
328        self.read_sections(elf)
329        if elf not in self._sections:
330            return ''
331        for s in self._sections[elf]:
332            if self.overlaps(s, iaddr, isize):
333                ret += ' ' + s[0]
334        return ret
335
336    def reset(self):
337        self._call_stack_found = False
338        if self._addr2line:
339            self._addr2line.terminate()
340            self._addr2line = None
341        self._addr2line_elf_name = None
342        self._arch = None
343        self._saved_abort_line = ''
344        self._sections = {}  # {elf_name: [[name, addr, size], ...], ...}
345        self._regions = []   # [[addr, size, elf_idx, saved line], ...]
346        self._elfs = {0: ["tee.elf", 0]}  # {idx: [uuid, load_addr], ...}
347
348    def pretty_print_path(self, path):
349        if self._strip_path:
350            return re.sub(re.escape(self._strip_path) + '/*', '', path)
351        return path
352
353    def write(self, line):
354        if self._call_stack_found:
355            match = re.search(STACK_ADDR_RE, line)
356            if match:
357                addr = match.group('addr')
358                pre = match.start('addr')
359                post = match.end('addr')
360                self._out.write(line[:pre])
361                self._out.write(addr)
362                res = self.resolve(addr)
363                res = self.pretty_print_path(res)
364                self._out.write(' ' + res)
365                self._out.write(line[post:])
366                return
367            else:
368                self.reset()
369        match = re.search(REGION_RE, line)
370        if match:
371            # Region table: save info for later processing once
372            # we know which UUID corresponds to which ELF index
373            addr = match.group('addr')
374            size = match.group('size')
375            elf_idx = match.group('elf_idx')
376            self._regions.append([addr, size, elf_idx, line])
377            return
378        match = re.search(ELF_LIST_RE, line)
379        if match:
380            # ELF list: save info for later. Region table and ELF list
381            # will be displayed when the call stack is reached
382            i = int(match.group('idx'))
383            self._elfs[i] = [match.group('uuid'), match.group('load_addr'),
384                             line]
385            return
386        match = re.search(CALL_STACK_RE, line)
387        if match:
388            self._call_stack_found = True
389            if self._regions:
390                for r in self._regions:
391                    r_addr = r[0]
392                    r_size = r[1]
393                    elf_idx = r[2]
394                    saved_line = r[3]
395                    if elf_idx is None:
396                        self._out.write(saved_line)
397                    else:
398                        self._out.write(saved_line.strip() +
399                                        self.sections_in_region(r_addr,
400                                                                r_size,
401                                                                elf_idx) +
402                                        '\n')
403            if self._elfs:
404                for k in self._elfs:
405                    e = self._elfs[k]
406                    if (len(e) >= 3):
407                        # TA executable or library
408                        self._out.write(e[2].strip())
409                        elf = self.get_elf(e[0])
410                        if elf:
411                            rpath = os.path.realpath(elf)
412                            path = self.pretty_print_path(rpath)
413                            self._out.write(' (' + path + ')')
414                        self._out.write('\n')
415            # Here is a good place to resolve the abort address because we
416            # have all the information we need
417            if self._saved_abort_line:
418                self._out.write(self.process_abort(self._saved_abort_line))
419        match = re.search(ABORT_ADDR_RE, line)
420        if match:
421            self.reset()
422            # At this point the arch and TA load address are unknown.
423            # Save the line so We can translate the abort address later.
424            self._saved_abort_line = line
425        self._out.write(line)
426
427    def flush(self):
428        self._out.flush()
429
430
431def main():
432    args = get_args()
433    if args.dir:
434        # Flatten list in case -d is used several times *and* with multiple
435        # arguments
436        args.dirs = [item for sublist in args.dir for item in sublist]
437    else:
438        args.dirs = []
439    symbolizer = Symbolizer(sys.stdout, args.dirs, args.strip_path)
440
441    for line in sys.stdin:
442        symbolizer.write(line)
443    symbolizer.flush()
444
445
446if __name__ == "__main__":
447    main()
448