xref: /optee_os/scripts/symbolize.py (revision 2a0d456fa09fba195ab615420d510a041398dbe5)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: BSD-2-Clause
3#
4# Copyright (c) 2017, Linaro Limited
5#
6
7
8import argparse
9import errno
10import glob
11import os
12import re
13import subprocess
14import sys
15import termios
16
17CALL_STACK_RE = re.compile('Call stack:')
18TEE_LOAD_ADDR_RE = re.compile(r'TEE load address @ (?P<load_addr>0x[0-9a-f]+)')
19# This gets the address from lines looking like this:
20# E/TC:0  0x001044a8
21STACK_ADDR_RE = re.compile(
22    r'[UEIDFM]/(TC|LD):(\?*|[0-9]*) [0-9]* +(?P<addr>0x[0-9a-f]+)')
23ABORT_ADDR_RE = re.compile(r'-abort at address (?P<addr>0x[0-9a-f]+)')
24REGION_RE = re.compile(r'region +[0-9]+: va (?P<addr>0x[0-9a-f]+) '
25                       r'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)'
26                       r'( flags .{4} (\[(?P<elf_idx>[0-9]+)\])?)?')
27ELF_LIST_RE = re.compile(r'\[(?P<idx>[0-9]+)\] (?P<uuid>[0-9a-f\-]+)'
28                         r' @ (?P<load_addr>0x[0-9a-f\-]+)')
29FUNC_GRAPH_RE = re.compile(r'Function graph')
30GRAPH_ADDR_RE = re.compile(r'(?P<addr>0x[0-9a-f]+)')
31GRAPH_RE = re.compile(r'}')
32
33epilog = '''
34This scripts reads an OP-TEE abort or panic message from stdin and adds debug
35information to the output, such as '<function> at <file>:<line>' next to each
36address in the call stack. Any message generated by OP-TEE and containing a
37call stack can in principle be processed by this script. This currently
38includes aborts and panics from the TEE core as well as from any TA.
39The paths provided on the command line are used to locate the appropriate ELF
40binary (tee.elf or Trusted Application). The GNU binutils (addr2line, objdump,
41nm) are used to extract the debug info. If the CROSS_COMPILE environment
42variable is set, it is used as a prefix to the binutils tools. That is, the
43script will invoke $(CROSS_COMPILE)addr2line etc. If it is not set however,
44the prefix will be determined automatically for each ELF file based on its
45architecture (arm-linux-gnueabihf-, aarch64-linux-gnu-). The resulting command
46is then expected to be found in the user's PATH.
47
48OP-TEE abort and panic messages are sent to the secure console. They look like
49the following:
50
51  E/TC:0 User TA data-abort at address 0xffffdecd (alignment fault)
52  ...
53  E/TC:0 Call stack:
54  E/TC:0  0x4000549e
55  E/TC:0  0x40001f4b
56  E/TC:0  0x4000273f
57  E/TC:0  0x40005da7
58
59Inspired by a script of the same name by the Chromium project.
60
61Sample usage:
62
63  $ scripts/symbolize.py -d out/arm-plat-hikey/core -d ../optee_test/out/ta/*
64  <paste whole dump here>
65  ^D
66
67Also, this script reads function graph generated for OP-TEE user TA from
68/tmp/ftrace-<ta_uuid>.out file and resolves function addresses to corresponding
69symbols.
70
71Sample usage:
72
73  $ cat /tmp/ftrace-<ta_uuid>.out | scripts/symbolize.py -d <ta_uuid>.elf
74  <paste function graph here>
75  ^D
76'''
77
78
79def get_args():
80    parser = argparse.ArgumentParser(
81        formatter_class=argparse.RawDescriptionHelpFormatter,
82        description='Symbolizes OP-TEE abort dumps or function graphs',
83        epilog=epilog)
84    parser.add_argument('-d', '--dir', action='append', nargs='+',
85                        help='Search for ELF file in DIR. tee.elf is needed '
86                        'to decode a TEE Core or pseudo-TA abort, while '
87                        '<TA_uuid>.elf is required if a user-mode TA has '
88                        'crashed. For convenience, ELF files may also be '
89                        'given.')
90    parser.add_argument('-s', '--strip_path', nargs='?',
91                        help='Strip STRIP_PATH from file paths (default: '
92                        'current directory, use -s with no argument to show '
93                        'full paths)', default=os.getcwd())
94
95    return parser.parse_args()
96
97
98class Symbolizer(object):
99    def __init__(self, out, dirs, strip_path):
100        self._out = out
101        self._dirs = dirs
102        self._strip_path = strip_path
103        self._addr2line = None
104        self.reset()
105
106    def my_Popen(self, cmd):
107        try:
108            return subprocess.Popen(cmd, stdin=subprocess.PIPE,
109                                    stdout=subprocess.PIPE,
110                                    universal_newlines=True,
111                                    bufsize=1)
112        except OSError as e:
113            if e.errno == errno.ENOENT:
114                print("*** Error:{}: command not found".format(cmd[0]),
115                      file=sys.stderr)
116                sys.exit(1)
117
118    def get_elf(self, elf_or_uuid):
119        if not elf_or_uuid.endswith('.elf'):
120            elf_or_uuid += '.elf'
121        for d in self._dirs:
122            if d.endswith(elf_or_uuid) and os.path.isfile(d):
123                return d
124            elf = glob.glob(d + '/' + elf_or_uuid)
125            if elf:
126                return elf[0]
127
128    def set_arch(self, elf):
129        self._arch = os.getenv('CROSS_COMPILE')
130        if self._arch:
131            return
132        p = subprocess.Popen(['file', '-L', elf], stdout=subprocess.PIPE)
133        output = p.stdout.readlines()
134        p.terminate()
135        if b'ARM aarch64,' in output[0]:
136            self._arch = 'aarch64-linux-gnu-'
137        elif b'ARM,' in output[0]:
138            self._arch = 'arm-linux-gnueabihf-'
139
140    def arch_prefix(self, cmd, elf):
141        self.set_arch(elf)
142        if self._arch is None:
143            return ''
144        return self._arch + cmd
145
146    def spawn_addr2line(self, elf_name):
147        if elf_name is None:
148            return
149        if self._addr2line_elf_name is elf_name:
150            return
151        if self._addr2line:
152            self._addr2line.terminate
153            self._addr2line = None
154        elf = self.get_elf(elf_name)
155        if not elf:
156            return
157        cmd = self.arch_prefix('addr2line', elf)
158        if not cmd:
159            return
160        self._addr2line = self.my_Popen([cmd, '-f', '-p', '-e', elf])
161        self._addr2line_elf_name = elf_name
162
163    # If addr falls into a region that maps a TA ELF file, return the load
164    # address of that file.
165    def elf_load_addr(self, addr):
166        if self._regions:
167            for r in self._regions:
168                r_addr = int(r[0], 16)
169                r_size = int(r[1], 16)
170                i_addr = int(addr, 16)
171                if (i_addr >= r_addr and i_addr < (r_addr + r_size)):
172                    # Found region
173                    elf_idx = r[2]
174                    if elf_idx is not None:
175                        return self._elfs[int(elf_idx)][1]
176            # In case address is not found in TA ELF file, fallback to tee.elf
177            # especially to symbolize mixed (user-space and kernel) addresses
178            # which is true when syscall ftrace is enabled along with TA
179            # ftrace.
180            return self._tee_load_addr
181        else:
182            # tee.elf
183            return self._tee_load_addr
184
185    def elf_for_addr(self, addr):
186        l_addr = self.elf_load_addr(addr)
187        if l_addr == self._tee_load_addr:
188            return 'tee.elf'
189        for k in self._elfs:
190            e = self._elfs[k]
191            if int(e[1], 16) == int(l_addr, 16):
192                return e[0]
193        return None
194
195    def subtract_load_addr(self, addr):
196        l_addr = self.elf_load_addr(addr)
197        if l_addr is None:
198            return None
199        if int(l_addr, 16) > int(addr, 16):
200            return ''
201        return '0x{:x}'.format(int(addr, 16) - int(l_addr, 16))
202
203    def resolve(self, addr):
204        reladdr = self.subtract_load_addr(addr)
205        self.spawn_addr2line(self.elf_for_addr(addr))
206        if not reladdr or not self._addr2line:
207            return '???'
208        if self.elf_for_addr(addr) == 'tee.elf':
209            reladdr = '0x{:x}'.format(int(reladdr, 16) +
210                                      int(self.first_vma('tee.elf'), 16))
211        try:
212            print(reladdr, file=self._addr2line.stdin)
213            ret = self._addr2line.stdout.readline().rstrip('\n')
214        except IOError:
215            ret = '!!!'
216        return ret
217
218    # Armv8.5 with Memory Tagging Extension (MTE)
219    def strip_armv85_mte_tag(self, addr):
220        i_addr = int(addr, 16)
221        i_addr &= ~(0xf << 56)
222        return '0x{:x}'.format(i_addr)
223
224    def symbol_plus_offset(self, addr):
225        ret = ''
226        prevsize = 0
227        addr = self.strip_armv85_mte_tag(addr)
228        reladdr = self.subtract_load_addr(addr)
229        elf_name = self.elf_for_addr(addr)
230        if elf_name is None:
231            return ''
232        elf = self.get_elf(elf_name)
233        cmd = self.arch_prefix('nm', elf)
234        if not reladdr or not elf or not cmd:
235            return ''
236        ireladdr = int(reladdr, 16)
237        nm = self.my_Popen([cmd, '--numeric-sort', '--print-size', elf])
238        for line in iter(nm.stdout.readline, ''):
239            try:
240                addr, size, _, name = line.split()
241            except ValueError:
242                # Size is missing
243                try:
244                    addr, _, name = line.split()
245                    size = '0'
246                except ValueError:
247                    # E.g., undefined (external) symbols (line = "U symbol")
248                    continue
249            iaddr = int(addr, 16)
250            isize = int(size, 16)
251            if iaddr == ireladdr:
252                ret = name
253                break
254            if iaddr < ireladdr and iaddr + isize >= ireladdr:
255                offs = ireladdr - iaddr
256                ret = name + '+' + str(offs)
257                break
258            if iaddr > ireladdr and prevsize == 0:
259                offs = iaddr + ireladdr
260                ret = prevname + '+' + str(offs)
261                break
262            prevsize = size
263            prevname = name
264        nm.terminate()
265        return ret
266
267    def section_plus_offset(self, addr):
268        ret = ''
269        reladdr = self.subtract_load_addr(addr)
270        elf_name = self.elf_for_addr(addr)
271        if elf_name is None:
272            return ''
273        elf = self.get_elf(elf_name)
274        cmd = self.arch_prefix('objdump', elf)
275        if not reladdr or not elf or not cmd:
276            return ''
277        iaddr = int(reladdr, 16)
278        objdump = self.my_Popen([cmd, '--section-headers', elf])
279        for line in iter(objdump.stdout.readline, ''):
280            try:
281                idx, name, size, vma, lma, offs, algn = line.split()
282            except ValueError:
283                continue
284            ivma = int(vma, 16)
285            isize = int(size, 16)
286            if ivma == iaddr:
287                ret = name
288                break
289            if ivma < iaddr and ivma + isize >= iaddr:
290                offs = iaddr - ivma
291                ret = name + '+' + str(offs)
292                break
293        objdump.terminate()
294        return ret
295
296    def process_abort(self, line):
297        ret = ''
298        match = re.search(ABORT_ADDR_RE, line)
299        addr = match.group('addr')
300        pre = match.start('addr')
301        post = match.end('addr')
302        sym = self.symbol_plus_offset(addr)
303        sec = self.section_plus_offset(addr)
304        if sym or sec:
305            ret += line[:pre]
306            ret += addr
307            if sym:
308                ret += ' ' + sym
309            if sec:
310                ret += ' ' + sec
311            ret += line[post:]
312        return ret
313
314    # Return all ELF sections with the ALLOC flag
315    def read_sections(self, elf_name):
316        if elf_name is None:
317            return
318        if elf_name in self._sections:
319            return
320        elf = self.get_elf(elf_name)
321        if not elf:
322            return
323        cmd = self.arch_prefix('objdump', elf)
324        if not elf or not cmd:
325            return
326        self._sections[elf_name] = []
327        objdump = self.my_Popen([cmd, '--section-headers', elf])
328        for line in iter(objdump.stdout.readline, ''):
329            try:
330                _, name, size, vma, _, _, _ = line.split()
331            except ValueError:
332                if 'ALLOC' in line:
333                    self._sections[elf_name].append([name, int(vma, 16),
334                                                     int(size, 16)])
335
336    def first_vma(self, elf_name):
337        self.read_sections(elf_name)
338        return '0x{:x}'.format(self._sections[elf_name][0][1])
339
340    def overlaps(self, section, addr, size):
341        sec_addr = section[1]
342        sec_size = section[2]
343        if not size or not sec_size:
344            return False
345        return ((addr <= (sec_addr + sec_size - 1)) and
346                ((addr + size - 1) >= sec_addr))
347
348    def sections_in_region(self, addr, size, elf_idx):
349        ret = ''
350        addr = self.subtract_load_addr(addr)
351        if not addr:
352            return ''
353        iaddr = int(addr, 16)
354        isize = int(size, 16)
355        elf = self._elfs[int(elf_idx)][0]
356        if elf is None:
357            return ''
358        self.read_sections(elf)
359        if elf not in self._sections:
360            return ''
361        for s in self._sections[elf]:
362            if self.overlaps(s, iaddr, isize):
363                ret += ' ' + s[0]
364        return ret
365
366    def reset(self):
367        self._call_stack_found = False
368        if self._addr2line:
369            self._addr2line.terminate()
370            self._addr2line = None
371        self._addr2line_elf_name = None
372        self._arch = None
373        self._saved_abort_line = ''
374        self._sections = {}  # {elf_name: [[name, addr, size], ...], ...}
375        self._regions = []   # [[addr, size, elf_idx, saved line], ...]
376        self._elfs = {0: ["tee.elf", 0]}  # {idx: [uuid, load_addr], ...}
377        self._tee_load_addr = '0x0'
378        self._func_graph_found = False
379        self._func_graph_skip_line = True
380
381    def pretty_print_path(self, path):
382        if self._strip_path:
383            return re.sub(re.escape(self._strip_path) + '/*', '', path)
384        return path
385
386    def write(self, line):
387        if self._call_stack_found:
388            match = re.search(STACK_ADDR_RE, line)
389            if match:
390                addr = match.group('addr')
391                pre = match.start('addr')
392                post = match.end('addr')
393                self._out.write(line[:pre])
394                self._out.write(addr)
395                # The call stack contains return addresses (LR/ELR values).
396                # Heuristic: subtract 2 to obtain the call site of the function
397                # or the location of the exception. This value works for A64,
398                # A32 as well as Thumb.
399                pc = 0
400                lr = int(addr, 16)
401                if lr:
402                    pc = lr - 2
403                res = self.resolve('0x{:x}'.format(pc))
404                res = self.pretty_print_path(res)
405                self._out.write(' ' + res)
406                self._out.write(line[post:])
407                return
408            else:
409                self.reset()
410        if self._func_graph_found:
411            match = re.search(GRAPH_ADDR_RE, line)
412            match_re = re.search(GRAPH_RE, line)
413            if match:
414                addr = match.group('addr')
415                pre = match.start('addr')
416                post = match.end('addr')
417                self._out.write(line[:pre])
418                res = self.resolve(addr)
419                res_arr = re.split(' ', res)
420                self._out.write(res_arr[0])
421                self._out.write(line[post:])
422                self._func_graph_skip_line = False
423                return
424            elif match_re:
425                self._out.write(line)
426                return
427            elif self._func_graph_skip_line:
428                return
429            else:
430                self.reset()
431        match = re.search(REGION_RE, line)
432        if match:
433            # Region table: save info for later processing once
434            # we know which UUID corresponds to which ELF index
435            addr = match.group('addr')
436            size = match.group('size')
437            elf_idx = match.group('elf_idx')
438            self._regions.append([addr, size, elf_idx, line])
439            return
440        match = re.search(ELF_LIST_RE, line)
441        if match:
442            # ELF list: save info for later. Region table and ELF list
443            # will be displayed when the call stack is reached
444            i = int(match.group('idx'))
445            self._elfs[i] = [match.group('uuid'), match.group('load_addr'),
446                             line]
447            return
448        match = re.search(TEE_LOAD_ADDR_RE, line)
449        if match:
450            self._tee_load_addr = match.group('load_addr')
451        match = re.search(CALL_STACK_RE, line)
452        if match:
453            self._call_stack_found = True
454            if self._regions:
455                for r in self._regions:
456                    r_addr = r[0]
457                    r_size = r[1]
458                    elf_idx = r[2]
459                    saved_line = r[3]
460                    if elf_idx is None:
461                        self._out.write(saved_line)
462                    else:
463                        self._out.write(saved_line.strip() +
464                                        self.sections_in_region(r_addr,
465                                                                r_size,
466                                                                elf_idx) +
467                                        '\n')
468            if self._elfs:
469                for k in self._elfs:
470                    e = self._elfs[k]
471                    if (len(e) >= 3):
472                        # TA executable or library
473                        self._out.write(e[2].strip())
474                        elf = self.get_elf(e[0])
475                        if elf:
476                            rpath = os.path.realpath(elf)
477                            path = self.pretty_print_path(rpath)
478                            self._out.write(' (' + path + ')')
479                        self._out.write('\n')
480            # Here is a good place to resolve the abort address because we
481            # have all the information we need
482            if self._saved_abort_line:
483                self._out.write(self.process_abort(self._saved_abort_line))
484        match = re.search(FUNC_GRAPH_RE, line)
485        if match:
486            self._func_graph_found = True
487        match = re.search(ABORT_ADDR_RE, line)
488        if match:
489            self.reset()
490            # At this point the arch and TA load address are unknown.
491            # Save the line so We can translate the abort address later.
492            self._saved_abort_line = line
493        self._out.write(line)
494
495    def flush(self):
496        self._out.flush()
497
498
499def main():
500    args = get_args()
501    if args.dir:
502        # Flatten list in case -d is used several times *and* with multiple
503        # arguments
504        args.dirs = [item for sublist in args.dir for item in sublist]
505    else:
506        args.dirs = []
507    symbolizer = Symbolizer(sys.stdout, args.dirs, args.strip_path)
508
509    fd = sys.stdin.fileno()
510    isatty = os.isatty(fd)
511    if isatty:
512        old = termios.tcgetattr(fd)
513        new = termios.tcgetattr(fd)
514        new[3] = new[3] & ~termios.ECHO  # lflags
515    try:
516        if isatty:
517            termios.tcsetattr(fd, termios.TCSADRAIN, new)
518        for line in sys.stdin:
519            symbolizer.write(line)
520    finally:
521        symbolizer.flush()
522        if isatty:
523            termios.tcsetattr(fd, termios.TCSADRAIN, old)
524
525
526if __name__ == "__main__":
527    main()
528