xref: /optee_os/scripts/symbolize.py (revision 17be223af9df17df8f65a0978bb3c50aec8ded9c)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: BSD-2-Clause
3#
4# Copyright (c) 2017, Linaro Limited
5#
6
7
8import argparse
9import errno
10import glob
11import os
12import re
13import subprocess
14import sys
15import termios
16
17CALL_STACK_RE = re.compile('Call stack:')
18TEE_LOAD_ADDR_RE = re.compile(r'TEE load address @ (?P<load_addr>0x[0-9a-f]+)')
19# This gets the address from lines looking like this:
20# E/TC:0  0x001044a8
21STACK_ADDR_RE = re.compile(
22    r'[UEIDFM]/(TC|LD):(\?*|[0-9]*) [0-9]* +(?P<addr>0x[0-9a-f]+)')
23ABORT_ADDR_RE = re.compile(r'-abort at address (?P<addr>0x[0-9a-f]+)')
24REGION_RE = re.compile(r'region +[0-9]+: va (?P<addr>0x[0-9a-f]+) '
25                       r'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)'
26                       r'( flags .{4} (\[(?P<elf_idx>[0-9]+)\])?)?')
27ELF_LIST_RE = re.compile(r'\[(?P<idx>[0-9]+)\] (?P<uuid>[0-9a-f\-]+)'
28                         r' @ (?P<load_addr>0x[0-9a-f\-]+)')
29FUNC_GRAPH_RE = re.compile(r'Function graph')
30GRAPH_ADDR_RE = re.compile(r'(?P<addr>0x[0-9a-f]+)')
31GRAPH_RE = re.compile(r'}')
32
33epilog = '''
34This scripts reads an OP-TEE abort or panic message from stdin and adds debug
35information to the output, such as '<function> at <file>:<line>' next to each
36address in the call stack. Any message generated by OP-TEE and containing a
37call stack can in principle be processed by this script. This currently
38includes aborts and panics from the TEE core as well as from any TA.
39The paths provided on the command line are used to locate the appropriate ELF
40binary (tee.elf or Trusted Application). The GNU binutils (addr2line, objdump,
41nm) are used to extract the debug info. If the CROSS_COMPILE environment
42variable is set, it is used as a prefix to the binutils tools. That is, the
43script will invoke $(CROSS_COMPILE)addr2line etc. If it is not set however,
44the prefix will be determined automatically for each ELF file based on its
45architecture (arm-linux-gnueabihf-, aarch64-linux-gnu-). The resulting command
46is then expected to be found in the user's PATH.
47
48OP-TEE abort and panic messages are sent to the secure console. They look like
49the following:
50
51  E/TC:0 User TA data-abort at address 0xffffdecd (alignment fault)
52  ...
53  E/TC:0 Call stack:
54  E/TC:0  0x4000549e
55  E/TC:0  0x40001f4b
56  E/TC:0  0x4000273f
57  E/TC:0  0x40005da7
58
59Inspired by a script of the same name by the Chromium project.
60
61Sample usage:
62
63  $ scripts/symbolize.py -d out/arm-plat-hikey/core -d ../optee_test/out/ta/*
64  <paste whole dump here>
65  ^D
66
67Also, this script reads function graph generated for OP-TEE user TA from
68/tmp/ftrace-<ta_uuid>.out file and resolves function addresses to corresponding
69symbols.
70
71Sample usage:
72
73  $ cat /tmp/ftrace-<ta_uuid>.out | scripts/symbolize.py -d <ta_uuid>.elf
74  <paste function graph here>
75  ^D
76'''
77
78
79def get_args():
80    parser = argparse.ArgumentParser(
81        formatter_class=argparse.RawDescriptionHelpFormatter,
82        description='Symbolizes OP-TEE abort dumps or function graphs',
83        epilog=epilog)
84    parser.add_argument('-d', '--dir', action='append', nargs='+',
85                        help='Search for ELF file in DIR. tee.elf is needed '
86                        'to decode a TEE Core or pseudo-TA abort, while '
87                        '<TA_uuid>.elf is required if a user-mode TA has '
88                        'crashed. For convenience, ELF files may also be '
89                        'given.')
90    parser.add_argument('-s', '--strip_path', nargs='?',
91                        help='Strip STRIP_PATH from file paths (default: '
92                        'current directory, use -s with no argument to show '
93                        'full paths)', default=os.getcwd())
94
95    return parser.parse_args()
96
97
98class Symbolizer(object):
99    def __init__(self, out, dirs, strip_path):
100        self._out = out
101        self._dirs = dirs
102        self._strip_path = strip_path
103        self._addr2line = None
104        self.reset()
105
106    def my_Popen(self, cmd):
107        try:
108            return subprocess.Popen(cmd, stdin=subprocess.PIPE,
109                                    stdout=subprocess.PIPE,
110                                    universal_newlines=True,
111                                    bufsize=1)
112        except OSError as e:
113            if e.errno == errno.ENOENT:
114                print("*** Error:{}: command not found".format(cmd[0]),
115                      file=sys.stderr)
116                sys.exit(1)
117
118    def get_elf(self, elf_or_uuid):
119        if not elf_or_uuid.endswith('.elf'):
120            elf_or_uuid += '.elf'
121        for d in self._dirs:
122            if d.endswith(elf_or_uuid) and os.path.isfile(d):
123                return d
124            elf = glob.glob(d + '/' + elf_or_uuid)
125            if elf:
126                return elf[0]
127
128    def set_arch(self):
129        if self._arch:
130            return
131        self._arch = os.getenv('CROSS_COMPILE')
132        if self._arch:
133            return
134        elf = self.get_elf(self._elfs[0][0])
135        if elf is None:
136            return
137        p = subprocess.Popen(['file', self.get_elf(self._elfs[0][0])],
138                             stdout=subprocess.PIPE)
139        output = p.stdout.readlines()
140        p.terminate()
141        if b'ARM aarch64,' in output[0]:
142            self._arch = 'aarch64-linux-gnu-'
143        elif b'ARM,' in output[0]:
144            self._arch = 'arm-linux-gnueabihf-'
145
146    def arch_prefix(self, cmd):
147        self.set_arch()
148        if self._arch is None:
149            return ''
150        return self._arch + cmd
151
152    def spawn_addr2line(self, elf_name):
153        if elf_name is None:
154            return
155        if self._addr2line_elf_name is elf_name:
156            return
157        if self._addr2line:
158            self._addr2line.terminate
159            self._addr2line = None
160        elf = self.get_elf(elf_name)
161        if not elf:
162            return
163        cmd = self.arch_prefix('addr2line')
164        if not cmd:
165            return
166        self._addr2line = self.my_Popen([cmd, '-f', '-p', '-e', elf])
167
168    # If addr falls into a region that maps a TA ELF file, return the load
169    # address of that file.
170    def elf_load_addr(self, addr):
171        if self._regions:
172            for r in self._regions:
173                r_addr = int(r[0], 16)
174                r_size = int(r[1], 16)
175                i_addr = int(addr, 16)
176                if (i_addr >= r_addr and i_addr < (r_addr + r_size)):
177                    # Found region
178                    elf_idx = r[2]
179                    if elf_idx is not None:
180                        return self._elfs[int(elf_idx)][1]
181            # In case address is not found in TA ELF file, fallback to tee.elf
182            # especially to symbolize mixed (user-space and kernel) addresses
183            # which is true when syscall ftrace is enabled along with TA
184            # ftrace.
185            return self._tee_load_addr
186        else:
187            # tee.elf
188            return self._tee_load_addr
189
190    def elf_for_addr(self, addr):
191        l_addr = self.elf_load_addr(addr)
192        if l_addr == self._tee_load_addr:
193            return 'tee.elf'
194        for k in self._elfs:
195            e = self._elfs[k]
196            if int(e[1], 16) == int(l_addr, 16):
197                return e[0]
198        return None
199
200    def subtract_load_addr(self, addr):
201        l_addr = self.elf_load_addr(addr)
202        if l_addr is None:
203            return None
204        if int(l_addr, 16) > int(addr, 16):
205            return ''
206        return '0x{:x}'.format(int(addr, 16) - int(l_addr, 16))
207
208    def resolve(self, addr):
209        reladdr = self.subtract_load_addr(addr)
210        self.spawn_addr2line(self.elf_for_addr(addr))
211        if not reladdr or not self._addr2line:
212            return '???'
213        if self.elf_for_addr(addr) == 'tee.elf':
214            reladdr = '0x{:x}'.format(int(reladdr, 16) +
215                                      int(self.first_vma('tee.elf'), 16))
216        try:
217            print(reladdr, file=self._addr2line.stdin)
218            ret = self._addr2line.stdout.readline().rstrip('\n')
219        except IOError:
220            ret = '!!!'
221        return ret
222
223    def symbol_plus_offset(self, addr):
224        ret = ''
225        prevsize = 0
226        reladdr = self.subtract_load_addr(addr)
227        elf_name = self.elf_for_addr(addr)
228        if elf_name is None:
229            return ''
230        elf = self.get_elf(elf_name)
231        cmd = self.arch_prefix('nm')
232        if not reladdr or not elf or not cmd:
233            return ''
234        ireladdr = int(reladdr, 16)
235        nm = self.my_Popen([cmd, '--numeric-sort', '--print-size', elf])
236        for line in iter(nm.stdout.readline, ''):
237            try:
238                addr, size, _, name = line.split()
239            except ValueError:
240                # Size is missing
241                try:
242                    addr, _, name = line.split()
243                    size = '0'
244                except ValueError:
245                    # E.g., undefined (external) symbols (line = "U symbol")
246                    continue
247            iaddr = int(addr, 16)
248            isize = int(size, 16)
249            if iaddr == ireladdr:
250                ret = name
251                break
252            if iaddr < ireladdr and iaddr + isize >= ireladdr:
253                offs = ireladdr - iaddr
254                ret = name + '+' + str(offs)
255                break
256            if iaddr > ireladdr and prevsize == 0:
257                offs = iaddr + ireladdr
258                ret = prevname + '+' + str(offs)
259                break
260            prevsize = size
261            prevname = name
262        nm.terminate()
263        return ret
264
265    def section_plus_offset(self, addr):
266        ret = ''
267        reladdr = self.subtract_load_addr(addr)
268        elf_name = self.elf_for_addr(addr)
269        if elf_name is None:
270            return ''
271        elf = self.get_elf(elf_name)
272        cmd = self.arch_prefix('objdump')
273        if not reladdr or not elf or not cmd:
274            return ''
275        iaddr = int(reladdr, 16)
276        objdump = self.my_Popen([cmd, '--section-headers', elf])
277        for line in iter(objdump.stdout.readline, ''):
278            try:
279                idx, name, size, vma, lma, offs, algn = line.split()
280            except ValueError:
281                continue
282            ivma = int(vma, 16)
283            isize = int(size, 16)
284            if ivma == iaddr:
285                ret = name
286                break
287            if ivma < iaddr and ivma + isize >= iaddr:
288                offs = iaddr - ivma
289                ret = name + '+' + str(offs)
290                break
291        objdump.terminate()
292        return ret
293
294    def process_abort(self, line):
295        ret = ''
296        match = re.search(ABORT_ADDR_RE, line)
297        addr = match.group('addr')
298        pre = match.start('addr')
299        post = match.end('addr')
300        sym = self.symbol_plus_offset(addr)
301        sec = self.section_plus_offset(addr)
302        if sym or sec:
303            ret += line[:pre]
304            ret += addr
305            if sym:
306                ret += ' ' + sym
307            if sec:
308                ret += ' ' + sec
309            ret += line[post:]
310        return ret
311
312    # Return all ELF sections with the ALLOC flag
313    def read_sections(self, elf_name):
314        if elf_name is None:
315            return
316        if elf_name in self._sections:
317            return
318        elf = self.get_elf(elf_name)
319        cmd = self.arch_prefix('objdump')
320        if not elf or not cmd:
321            return
322        self._sections[elf_name] = []
323        objdump = self.my_Popen([cmd, '--section-headers', elf])
324        for line in iter(objdump.stdout.readline, ''):
325            try:
326                _, name, size, vma, _, _, _ = line.split()
327            except ValueError:
328                if 'ALLOC' in line:
329                    self._sections[elf_name].append([name, int(vma, 16),
330                                                     int(size, 16)])
331
332    def first_vma(self, elf_name):
333        self.read_sections(elf_name)
334        return '0x{:x}'.format(self._sections[elf_name][0][1])
335
336    def overlaps(self, section, addr, size):
337        sec_addr = section[1]
338        sec_size = section[2]
339        if not size or not sec_size:
340            return False
341        return ((addr <= (sec_addr + sec_size - 1)) and
342                ((addr + size - 1) >= sec_addr))
343
344    def sections_in_region(self, addr, size, elf_idx):
345        ret = ''
346        addr = self.subtract_load_addr(addr)
347        if not addr:
348            return ''
349        iaddr = int(addr, 16)
350        isize = int(size, 16)
351        elf = self._elfs[int(elf_idx)][0]
352        if elf is None:
353            return ''
354        self.read_sections(elf)
355        if elf not in self._sections:
356            return ''
357        for s in self._sections[elf]:
358            if self.overlaps(s, iaddr, isize):
359                ret += ' ' + s[0]
360        return ret
361
362    def reset(self):
363        self._call_stack_found = False
364        if self._addr2line:
365            self._addr2line.terminate()
366            self._addr2line = None
367        self._addr2line_elf_name = None
368        self._arch = None
369        self._saved_abort_line = ''
370        self._sections = {}  # {elf_name: [[name, addr, size], ...], ...}
371        self._regions = []   # [[addr, size, elf_idx, saved line], ...]
372        self._elfs = {0: ["tee.elf", 0]}  # {idx: [uuid, load_addr], ...}
373        self._tee_load_addr = '0x0'
374        self._func_graph_found = False
375        self._func_graph_skip_line = True
376
377    def pretty_print_path(self, path):
378        if self._strip_path:
379            return re.sub(re.escape(self._strip_path) + '/*', '', path)
380        return path
381
382    def write(self, line):
383        if self._call_stack_found:
384            match = re.search(STACK_ADDR_RE, line)
385            if match:
386                addr = match.group('addr')
387                pre = match.start('addr')
388                post = match.end('addr')
389                self._out.write(line[:pre])
390                self._out.write(addr)
391                res = self.resolve(addr)
392                res = self.pretty_print_path(res)
393                self._out.write(' ' + res)
394                self._out.write(line[post:])
395                return
396            else:
397                self.reset()
398        if self._func_graph_found:
399            match = re.search(GRAPH_ADDR_RE, line)
400            match_re = re.search(GRAPH_RE, line)
401            if match:
402                addr = match.group('addr')
403                pre = match.start('addr')
404                post = match.end('addr')
405                self._out.write(line[:pre])
406                res = self.resolve(addr)
407                res_arr = re.split(' ', res)
408                self._out.write(res_arr[0])
409                self._out.write(line[post:])
410                self._func_graph_skip_line = False
411                return
412            elif match_re:
413                self._out.write(line)
414                return
415            elif self._func_graph_skip_line:
416                return
417            else:
418                self.reset()
419        match = re.search(REGION_RE, line)
420        if match:
421            # Region table: save info for later processing once
422            # we know which UUID corresponds to which ELF index
423            addr = match.group('addr')
424            size = match.group('size')
425            elf_idx = match.group('elf_idx')
426            self._regions.append([addr, size, elf_idx, line])
427            return
428        match = re.search(ELF_LIST_RE, line)
429        if match:
430            # ELF list: save info for later. Region table and ELF list
431            # will be displayed when the call stack is reached
432            i = int(match.group('idx'))
433            self._elfs[i] = [match.group('uuid'), match.group('load_addr'),
434                             line]
435            return
436        match = re.search(TEE_LOAD_ADDR_RE, line)
437        if match:
438            self._tee_load_addr = match.group('load_addr')
439        match = re.search(CALL_STACK_RE, line)
440        if match:
441            self._call_stack_found = True
442            if self._regions:
443                for r in self._regions:
444                    r_addr = r[0]
445                    r_size = r[1]
446                    elf_idx = r[2]
447                    saved_line = r[3]
448                    if elf_idx is None:
449                        self._out.write(saved_line)
450                    else:
451                        self._out.write(saved_line.strip() +
452                                        self.sections_in_region(r_addr,
453                                                                r_size,
454                                                                elf_idx) +
455                                        '\n')
456            if self._elfs:
457                for k in self._elfs:
458                    e = self._elfs[k]
459                    if (len(e) >= 3):
460                        # TA executable or library
461                        self._out.write(e[2].strip())
462                        elf = self.get_elf(e[0])
463                        if elf:
464                            rpath = os.path.realpath(elf)
465                            path = self.pretty_print_path(rpath)
466                            self._out.write(' (' + path + ')')
467                        self._out.write('\n')
468            # Here is a good place to resolve the abort address because we
469            # have all the information we need
470            if self._saved_abort_line:
471                self._out.write(self.process_abort(self._saved_abort_line))
472        match = re.search(FUNC_GRAPH_RE, line)
473        if match:
474            self._func_graph_found = True
475        match = re.search(ABORT_ADDR_RE, line)
476        if match:
477            self.reset()
478            # At this point the arch and TA load address are unknown.
479            # Save the line so We can translate the abort address later.
480            self._saved_abort_line = line
481        self._out.write(line)
482
483    def flush(self):
484        self._out.flush()
485
486
487def main():
488    args = get_args()
489    if args.dir:
490        # Flatten list in case -d is used several times *and* with multiple
491        # arguments
492        args.dirs = [item for sublist in args.dir for item in sublist]
493    else:
494        args.dirs = []
495    symbolizer = Symbolizer(sys.stdout, args.dirs, args.strip_path)
496
497    fd = sys.stdin.fileno()
498    isatty = os.isatty(fd)
499    if isatty:
500        old = termios.tcgetattr(fd)
501        new = termios.tcgetattr(fd)
502        new[3] = new[3] & ~termios.ECHO  # lflags
503    try:
504        if isatty:
505            termios.tcsetattr(fd, termios.TCSADRAIN, new)
506        for line in sys.stdin:
507            symbolizer.write(line)
508    finally:
509        symbolizer.flush()
510        if isatty:
511            termios.tcsetattr(fd, termios.TCSADRAIN, old)
512
513
514if __name__ == "__main__":
515    main()
516