xref: /optee_os/scripts/symbolize.py (revision ba84a3f5c136f3ed3fefe782c9666928b729bb52)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: BSD-2-Clause
3#
4# Copyright (c) 2017, Linaro Limited
5#
6
7
8import argparse
9import errno
10import glob
11import os
12import re
13import subprocess
14import sys
15import termios
16
17CALL_STACK_RE = re.compile('Call stack:')
18TEE_LOAD_ADDR_RE = re.compile(r'TEE load address @ (?P<load_addr>0x[0-9a-f]+)')
19# This gets the address from lines looking like this:
20# E/TC:0  0x001044a8
21STACK_ADDR_RE = re.compile(
22    r'[UEIDFM]/(TC|LD):(\?*|[0-9]*) [0-9]* +(?P<addr>0x[0-9a-f]+)')
23ABORT_ADDR_RE = re.compile(r'-abort at address (?P<addr>0x[0-9a-f]+)')
24REGION_RE = re.compile(r'region +[0-9]+: va (?P<addr>0x[0-9a-f]+) '
25                       r'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)'
26                       r'( flags .{4} (\[(?P<elf_idx>[0-9]+)\])?)?')
27ELF_LIST_RE = re.compile(r'\[(?P<idx>[0-9]+)\] (?P<uuid>[0-9a-f\-]+)'
28                         r' @ (?P<load_addr>0x[0-9a-f\-]+)')
29FUNC_GRAPH_RE = re.compile(r'Function graph')
30GRAPH_ADDR_RE = re.compile(r'(?P<addr>0x[0-9a-f]+)')
31GRAPH_RE = re.compile(r'}')
32
33epilog = '''
34This scripts reads an OP-TEE abort or panic message from stdin and adds debug
35information to the output, such as '<function> at <file>:<line>' next to each
36address in the call stack. Any message generated by OP-TEE and containing a
37call stack can in principle be processed by this script. This currently
38includes aborts and panics from the TEE core as well as from any TA.
39The paths provided on the command line are used to locate the appropriate ELF
40binary (tee.elf or Trusted Application). The GNU binutils (addr2line, objdump,
41nm) are used to extract the debug info. If the CROSS_COMPILE environment
42variable is set, it is used as a prefix to the binutils tools. That is, the
43script will invoke $(CROSS_COMPILE)addr2line etc. If it is not set however,
44the prefix will be determined automatically for each ELF file based on its
45architecture (arm-linux-gnueabihf-, aarch64-linux-gnu-). The resulting command
46is then expected to be found in the user's PATH.
47
48OP-TEE abort and panic messages are sent to the secure console. They look like
49the following:
50
51  E/TC:0 User TA data-abort at address 0xffffdecd (alignment fault)
52  ...
53  E/TC:0 Call stack:
54  E/TC:0  0x4000549e
55  E/TC:0  0x40001f4b
56  E/TC:0  0x4000273f
57  E/TC:0  0x40005da7
58
59Inspired by a script of the same name by the Chromium project.
60
61Sample usage:
62
63  $ scripts/symbolize.py -d out/arm-plat-hikey/core -d ../optee_test/out/ta/*
64  <paste whole dump here>
65  ^D
66
67Also, this script reads function graph generated for OP-TEE user TA from
68/tmp/ftrace-<ta_uuid>.out file and resolves function addresses to corresponding
69symbols.
70
71Sample usage:
72
73  $ cat /tmp/ftrace-<ta_uuid>.out | scripts/symbolize.py -d <ta_uuid>.elf
74  <paste function graph here>
75  ^D
76'''
77
78
79def get_args():
80    parser = argparse.ArgumentParser(
81        formatter_class=argparse.RawDescriptionHelpFormatter,
82        description='Symbolizes OP-TEE abort dumps or function graphs',
83        epilog=epilog)
84    parser.add_argument('-d', '--dir', action='append', nargs='+',
85                        help='Search for ELF file in DIR. tee.elf is needed '
86                        'to decode a TEE Core or pseudo-TA abort, while '
87                        '<TA_uuid>.elf is required if a user-mode TA has '
88                        'crashed. For convenience, ELF files may also be '
89                        'given.')
90    parser.add_argument('-s', '--strip_path', nargs='?',
91                        help='Strip STRIP_PATH from file paths (default: '
92                        'current directory, use -s with no argument to show '
93                        'full paths)', default=os.getcwd())
94
95    return parser.parse_args()
96
97
98class Symbolizer(object):
99    def __init__(self, out, dirs, strip_path):
100        self._out = out
101        self._dirs = dirs
102        self._strip_path = strip_path
103        self._addr2line = None
104        self.reset()
105
106    def my_Popen(self, cmd):
107        try:
108            return subprocess.Popen(cmd, stdin=subprocess.PIPE,
109                                    stdout=subprocess.PIPE,
110                                    universal_newlines=True,
111                                    bufsize=1)
112        except OSError as e:
113            if e.errno == errno.ENOENT:
114                print("*** Error:{}: command not found".format(cmd[0]),
115                      file=sys.stderr)
116                sys.exit(1)
117
118    def get_elf(self, elf_or_uuid):
119        if not elf_or_uuid.endswith('.elf'):
120            elf_or_uuid += '.elf'
121        for d in self._dirs:
122            if d.endswith(elf_or_uuid) and os.path.isfile(d):
123                return d
124            elf = glob.glob(d + '/' + elf_or_uuid)
125            if elf:
126                return elf[0]
127
128    def set_arch(self):
129        if self._arch:
130            return
131        self._arch = os.getenv('CROSS_COMPILE')
132        if self._arch:
133            return
134        elf = self.get_elf(self._elfs[0][0])
135        if elf is None:
136            return
137        p = subprocess.Popen(['file', self.get_elf(self._elfs[0][0])],
138                             stdout=subprocess.PIPE)
139        output = p.stdout.readlines()
140        p.terminate()
141        if b'ARM aarch64,' in output[0]:
142            self._arch = 'aarch64-linux-gnu-'
143        elif b'ARM,' in output[0]:
144            self._arch = 'arm-linux-gnueabihf-'
145
146    def arch_prefix(self, cmd):
147        self.set_arch()
148        if self._arch is None:
149            return ''
150        return self._arch + cmd
151
152    def spawn_addr2line(self, elf_name):
153        if elf_name is None:
154            return
155        if self._addr2line_elf_name is elf_name:
156            return
157        if self._addr2line:
158            self._addr2line.terminate
159            self._addr2line = None
160        elf = self.get_elf(elf_name)
161        if not elf:
162            return
163        cmd = self.arch_prefix('addr2line')
164        if not cmd:
165            return
166        self._addr2line = self.my_Popen([cmd, '-f', '-p', '-e', elf])
167        self._addr2line_elf_name = elf_name
168
169    # If addr falls into a region that maps a TA ELF file, return the load
170    # address of that file.
171    def elf_load_addr(self, addr):
172        if self._regions:
173            for r in self._regions:
174                r_addr = int(r[0], 16)
175                r_size = int(r[1], 16)
176                i_addr = int(addr, 16)
177                if (i_addr >= r_addr and i_addr < (r_addr + r_size)):
178                    # Found region
179                    elf_idx = r[2]
180                    if elf_idx is not None:
181                        return self._elfs[int(elf_idx)][1]
182            # In case address is not found in TA ELF file, fallback to tee.elf
183            # especially to symbolize mixed (user-space and kernel) addresses
184            # which is true when syscall ftrace is enabled along with TA
185            # ftrace.
186            return self._tee_load_addr
187        else:
188            # tee.elf
189            return self._tee_load_addr
190
191    def elf_for_addr(self, addr):
192        l_addr = self.elf_load_addr(addr)
193        if l_addr == self._tee_load_addr:
194            return 'tee.elf'
195        for k in self._elfs:
196            e = self._elfs[k]
197            if int(e[1], 16) == int(l_addr, 16):
198                return e[0]
199        return None
200
201    def subtract_load_addr(self, addr):
202        l_addr = self.elf_load_addr(addr)
203        if l_addr is None:
204            return None
205        if int(l_addr, 16) > int(addr, 16):
206            return ''
207        return '0x{:x}'.format(int(addr, 16) - int(l_addr, 16))
208
209    def resolve(self, addr):
210        reladdr = self.subtract_load_addr(addr)
211        self.spawn_addr2line(self.elf_for_addr(addr))
212        if not reladdr or not self._addr2line:
213            return '???'
214        if self.elf_for_addr(addr) == 'tee.elf':
215            reladdr = '0x{:x}'.format(int(reladdr, 16) +
216                                      int(self.first_vma('tee.elf'), 16))
217        try:
218            print(reladdr, file=self._addr2line.stdin)
219            ret = self._addr2line.stdout.readline().rstrip('\n')
220        except IOError:
221            ret = '!!!'
222        return ret
223
224    def symbol_plus_offset(self, addr):
225        ret = ''
226        prevsize = 0
227        reladdr = self.subtract_load_addr(addr)
228        elf_name = self.elf_for_addr(addr)
229        if elf_name is None:
230            return ''
231        elf = self.get_elf(elf_name)
232        cmd = self.arch_prefix('nm')
233        if not reladdr or not elf or not cmd:
234            return ''
235        ireladdr = int(reladdr, 16)
236        nm = self.my_Popen([cmd, '--numeric-sort', '--print-size', elf])
237        for line in iter(nm.stdout.readline, ''):
238            try:
239                addr, size, _, name = line.split()
240            except ValueError:
241                # Size is missing
242                try:
243                    addr, _, name = line.split()
244                    size = '0'
245                except ValueError:
246                    # E.g., undefined (external) symbols (line = "U symbol")
247                    continue
248            iaddr = int(addr, 16)
249            isize = int(size, 16)
250            if iaddr == ireladdr:
251                ret = name
252                break
253            if iaddr < ireladdr and iaddr + isize >= ireladdr:
254                offs = ireladdr - iaddr
255                ret = name + '+' + str(offs)
256                break
257            if iaddr > ireladdr and prevsize == 0:
258                offs = iaddr + ireladdr
259                ret = prevname + '+' + str(offs)
260                break
261            prevsize = size
262            prevname = name
263        nm.terminate()
264        return ret
265
266    def section_plus_offset(self, addr):
267        ret = ''
268        reladdr = self.subtract_load_addr(addr)
269        elf_name = self.elf_for_addr(addr)
270        if elf_name is None:
271            return ''
272        elf = self.get_elf(elf_name)
273        cmd = self.arch_prefix('objdump')
274        if not reladdr or not elf or not cmd:
275            return ''
276        iaddr = int(reladdr, 16)
277        objdump = self.my_Popen([cmd, '--section-headers', elf])
278        for line in iter(objdump.stdout.readline, ''):
279            try:
280                idx, name, size, vma, lma, offs, algn = line.split()
281            except ValueError:
282                continue
283            ivma = int(vma, 16)
284            isize = int(size, 16)
285            if ivma == iaddr:
286                ret = name
287                break
288            if ivma < iaddr and ivma + isize >= iaddr:
289                offs = iaddr - ivma
290                ret = name + '+' + str(offs)
291                break
292        objdump.terminate()
293        return ret
294
295    def process_abort(self, line):
296        ret = ''
297        match = re.search(ABORT_ADDR_RE, line)
298        addr = match.group('addr')
299        pre = match.start('addr')
300        post = match.end('addr')
301        sym = self.symbol_plus_offset(addr)
302        sec = self.section_plus_offset(addr)
303        if sym or sec:
304            ret += line[:pre]
305            ret += addr
306            if sym:
307                ret += ' ' + sym
308            if sec:
309                ret += ' ' + sec
310            ret += line[post:]
311        return ret
312
313    # Return all ELF sections with the ALLOC flag
314    def read_sections(self, elf_name):
315        if elf_name is None:
316            return
317        if elf_name in self._sections:
318            return
319        elf = self.get_elf(elf_name)
320        cmd = self.arch_prefix('objdump')
321        if not elf or not cmd:
322            return
323        self._sections[elf_name] = []
324        objdump = self.my_Popen([cmd, '--section-headers', elf])
325        for line in iter(objdump.stdout.readline, ''):
326            try:
327                _, name, size, vma, _, _, _ = line.split()
328            except ValueError:
329                if 'ALLOC' in line:
330                    self._sections[elf_name].append([name, int(vma, 16),
331                                                     int(size, 16)])
332
333    def first_vma(self, elf_name):
334        self.read_sections(elf_name)
335        return '0x{:x}'.format(self._sections[elf_name][0][1])
336
337    def overlaps(self, section, addr, size):
338        sec_addr = section[1]
339        sec_size = section[2]
340        if not size or not sec_size:
341            return False
342        return ((addr <= (sec_addr + sec_size - 1)) and
343                ((addr + size - 1) >= sec_addr))
344
345    def sections_in_region(self, addr, size, elf_idx):
346        ret = ''
347        addr = self.subtract_load_addr(addr)
348        if not addr:
349            return ''
350        iaddr = int(addr, 16)
351        isize = int(size, 16)
352        elf = self._elfs[int(elf_idx)][0]
353        if elf is None:
354            return ''
355        self.read_sections(elf)
356        if elf not in self._sections:
357            return ''
358        for s in self._sections[elf]:
359            if self.overlaps(s, iaddr, isize):
360                ret += ' ' + s[0]
361        return ret
362
363    def reset(self):
364        self._call_stack_found = False
365        if self._addr2line:
366            self._addr2line.terminate()
367            self._addr2line = None
368        self._addr2line_elf_name = None
369        self._arch = None
370        self._saved_abort_line = ''
371        self._sections = {}  # {elf_name: [[name, addr, size], ...], ...}
372        self._regions = []   # [[addr, size, elf_idx, saved line], ...]
373        self._elfs = {0: ["tee.elf", 0]}  # {idx: [uuid, load_addr], ...}
374        self._tee_load_addr = '0x0'
375        self._func_graph_found = False
376        self._func_graph_skip_line = True
377
378    def pretty_print_path(self, path):
379        if self._strip_path:
380            return re.sub(re.escape(self._strip_path) + '/*', '', path)
381        return path
382
383    def write(self, line):
384        if self._call_stack_found:
385            match = re.search(STACK_ADDR_RE, line)
386            if match:
387                addr = match.group('addr')
388                pre = match.start('addr')
389                post = match.end('addr')
390                self._out.write(line[:pre])
391                self._out.write(addr)
392                res = self.resolve(addr)
393                res = self.pretty_print_path(res)
394                self._out.write(' ' + res)
395                self._out.write(line[post:])
396                return
397            else:
398                self.reset()
399        if self._func_graph_found:
400            match = re.search(GRAPH_ADDR_RE, line)
401            match_re = re.search(GRAPH_RE, line)
402            if match:
403                addr = match.group('addr')
404                pre = match.start('addr')
405                post = match.end('addr')
406                self._out.write(line[:pre])
407                res = self.resolve(addr)
408                res_arr = re.split(' ', res)
409                self._out.write(res_arr[0])
410                self._out.write(line[post:])
411                self._func_graph_skip_line = False
412                return
413            elif match_re:
414                self._out.write(line)
415                return
416            elif self._func_graph_skip_line:
417                return
418            else:
419                self.reset()
420        match = re.search(REGION_RE, line)
421        if match:
422            # Region table: save info for later processing once
423            # we know which UUID corresponds to which ELF index
424            addr = match.group('addr')
425            size = match.group('size')
426            elf_idx = match.group('elf_idx')
427            self._regions.append([addr, size, elf_idx, line])
428            return
429        match = re.search(ELF_LIST_RE, line)
430        if match:
431            # ELF list: save info for later. Region table and ELF list
432            # will be displayed when the call stack is reached
433            i = int(match.group('idx'))
434            self._elfs[i] = [match.group('uuid'), match.group('load_addr'),
435                             line]
436            return
437        match = re.search(TEE_LOAD_ADDR_RE, line)
438        if match:
439            self._tee_load_addr = match.group('load_addr')
440        match = re.search(CALL_STACK_RE, line)
441        if match:
442            self._call_stack_found = True
443            if self._regions:
444                for r in self._regions:
445                    r_addr = r[0]
446                    r_size = r[1]
447                    elf_idx = r[2]
448                    saved_line = r[3]
449                    if elf_idx is None:
450                        self._out.write(saved_line)
451                    else:
452                        self._out.write(saved_line.strip() +
453                                        self.sections_in_region(r_addr,
454                                                                r_size,
455                                                                elf_idx) +
456                                        '\n')
457            if self._elfs:
458                for k in self._elfs:
459                    e = self._elfs[k]
460                    if (len(e) >= 3):
461                        # TA executable or library
462                        self._out.write(e[2].strip())
463                        elf = self.get_elf(e[0])
464                        if elf:
465                            rpath = os.path.realpath(elf)
466                            path = self.pretty_print_path(rpath)
467                            self._out.write(' (' + path + ')')
468                        self._out.write('\n')
469            # Here is a good place to resolve the abort address because we
470            # have all the information we need
471            if self._saved_abort_line:
472                self._out.write(self.process_abort(self._saved_abort_line))
473        match = re.search(FUNC_GRAPH_RE, line)
474        if match:
475            self._func_graph_found = True
476        match = re.search(ABORT_ADDR_RE, line)
477        if match:
478            self.reset()
479            # At this point the arch and TA load address are unknown.
480            # Save the line so We can translate the abort address later.
481            self._saved_abort_line = line
482        self._out.write(line)
483
484    def flush(self):
485        self._out.flush()
486
487
488def main():
489    args = get_args()
490    if args.dir:
491        # Flatten list in case -d is used several times *and* with multiple
492        # arguments
493        args.dirs = [item for sublist in args.dir for item in sublist]
494    else:
495        args.dirs = []
496    symbolizer = Symbolizer(sys.stdout, args.dirs, args.strip_path)
497
498    fd = sys.stdin.fileno()
499    isatty = os.isatty(fd)
500    if isatty:
501        old = termios.tcgetattr(fd)
502        new = termios.tcgetattr(fd)
503        new[3] = new[3] & ~termios.ECHO  # lflags
504    try:
505        if isatty:
506            termios.tcsetattr(fd, termios.TCSADRAIN, new)
507        for line in sys.stdin:
508            symbolizer.write(line)
509    finally:
510        symbolizer.flush()
511        if isatty:
512            termios.tcsetattr(fd, termios.TCSADRAIN, old)
513
514
515if __name__ == "__main__":
516    main()
517