xref: /optee_os/scripts/symbolize.py (revision d7c22ace31de10d440cd5fe76d976a03ea9b96a4)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: BSD-2-Clause
3#
4# Copyright (c) 2017, Linaro Limited
5#
6
7
8import argparse
9import errno
10import glob
11import os
12import re
13import subprocess
14import sys
15import termios
16
17CALL_STACK_RE = re.compile('Call stack:')
18TEE_LOAD_ADDR_RE = re.compile(r'TEE load address @ (?P<load_addr>0x[0-9a-f]+)')
19# This gets the address from lines looking like this:
20# E/TC:0  0x001044a8
21STACK_ADDR_RE = re.compile(
22    r'[UEIDFM]/(TC|LD):(\?*|[0-9]*) [0-9]* +(?P<addr>0x[0-9a-f]+)')
23ABORT_ADDR_RE = re.compile(r'-abort at address (?P<addr>0x[0-9a-f]+)')
24REGION_RE = re.compile(r'region +[0-9]+: va (?P<addr>0x[0-9a-f]+) '
25                       r'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)'
26                       r'( flags .{4} (\[(?P<elf_idx>[0-9]+)\])?)?')
27ELF_LIST_RE = re.compile(r'\[(?P<idx>[0-9]+)\] (?P<uuid>[0-9a-f\-]+)'
28                         r' @ (?P<load_addr>0x[0-9a-f\-]+)')
29FUNC_GRAPH_RE = re.compile(r'Function graph')
30GRAPH_ADDR_RE = re.compile(r'(?P<addr>0x[0-9a-f]+)')
31GRAPH_RE = re.compile(r'}')
32
33epilog = '''
34This scripts reads an OP-TEE abort or panic message from stdin and adds debug
35information to the output, such as '<function> at <file>:<line>' next to each
36address in the call stack. Any message generated by OP-TEE and containing a
37call stack can in principle be processed by this script. This currently
38includes aborts and panics from the TEE core as well as from any TA.
39The paths provided on the command line are used to locate the appropriate ELF
40binary (tee.elf or Trusted Application). The GNU binutils (addr2line, objdump,
41nm) are used to extract the debug info. If the CROSS_COMPILE environment
42variable is set, it is used as a prefix to the binutils tools. That is, the
43script will invoke $(CROSS_COMPILE)addr2line etc. If it is not set however,
44the prefix will be determined automatically for each ELF file based on its
45architecture (arm-linux-gnueabihf-, aarch64-linux-gnu-). The resulting command
46is then expected to be found in the user's PATH.
47
48OP-TEE abort and panic messages are sent to the secure console. They look like
49the following:
50
51  E/TC:0 User TA data-abort at address 0xffffdecd (alignment fault)
52  ...
53  E/TC:0 Call stack:
54  E/TC:0  0x4000549e
55  E/TC:0  0x40001f4b
56  E/TC:0  0x4000273f
57  E/TC:0  0x40005da7
58
59Inspired by a script of the same name by the Chromium project.
60
61Sample usage:
62
63  $ scripts/symbolize.py -d out/arm-plat-hikey/core -d ../optee_test/out/ta/*
64  <paste whole dump here>
65  ^D
66
67Also, this script reads function graph generated for OP-TEE user TA from
68/tmp/ftrace-<ta_uuid>.out file and resolves function addresses to corresponding
69symbols.
70
71Sample usage:
72
73  $ cat /tmp/ftrace-<ta_uuid>.out | scripts/symbolize.py -d <ta_uuid>.elf
74  <paste function graph here>
75  ^D
76'''
77
78
79def get_args():
80    parser = argparse.ArgumentParser(
81        formatter_class=argparse.RawDescriptionHelpFormatter,
82        description='Symbolizes OP-TEE abort dumps or function graphs',
83        epilog=epilog)
84    parser.add_argument('-d', '--dir', action='append', nargs='+',
85                        help='Search for ELF file in DIR. tee.elf is needed '
86                        'to decode a TEE Core or pseudo-TA abort, while '
87                        '<TA_uuid>.elf is required if a user-mode TA has '
88                        'crashed. For convenience, ELF files may also be '
89                        'given.')
90    parser.add_argument('-s', '--strip_path', nargs='?',
91                        help='Strip STRIP_PATH from file paths (default: '
92                        'current directory, use -s with no argument to show '
93                        'full paths)', default=os.getcwd())
94
95    return parser.parse_args()
96
97
98class Symbolizer(object):
99    def __init__(self, out, dirs, strip_path):
100        self._out = out
101        self._dirs = dirs
102        self._strip_path = strip_path
103        self._addr2line = None
104        self.reset()
105
106    def my_Popen(self, cmd):
107        try:
108            return subprocess.Popen(cmd, stdin=subprocess.PIPE,
109                                    stdout=subprocess.PIPE,
110                                    universal_newlines=True,
111                                    bufsize=1)
112        except OSError as e:
113            if e.errno == errno.ENOENT:
114                print("*** Error:{}: command not found".format(cmd[0]),
115                      file=sys.stderr)
116                sys.exit(1)
117
118    def get_elf(self, elf_or_uuid):
119        if not elf_or_uuid.endswith('.elf'):
120            elf_or_uuid += '.elf'
121        for d in self._dirs:
122            if d.endswith(elf_or_uuid) and os.path.isfile(d):
123                return d
124            elf = glob.glob(d + '/' + elf_or_uuid)
125            if elf:
126                return elf[0]
127
128    def set_arch(self, elf):
129        self._arch = os.getenv('CROSS_COMPILE')
130        if self._arch:
131            return
132        p = subprocess.Popen(['file', '-L', elf], stdout=subprocess.PIPE)
133        output = p.stdout.readlines()
134        p.terminate()
135        if b'ARM aarch64,' in output[0]:
136            self._arch = 'aarch64-linux-gnu-'
137        elif b'ARM,' in output[0]:
138            self._arch = 'arm-linux-gnueabihf-'
139
140    def arch_prefix(self, cmd, elf):
141        self.set_arch(elf)
142        if self._arch is None:
143            return ''
144        return self._arch + cmd
145
146    def spawn_addr2line(self, elf_name):
147        if elf_name is None:
148            return
149        if self._addr2line_elf_name is elf_name:
150            return
151        if self._addr2line:
152            self._addr2line.terminate
153            self._addr2line = None
154        elf = self.get_elf(elf_name)
155        if not elf:
156            return
157        cmd = self.arch_prefix('addr2line', elf)
158        if not cmd:
159            return
160        self._addr2line = self.my_Popen([cmd, '-f', '-p', '-e', elf])
161        self._addr2line_elf_name = elf_name
162
163    # If addr falls into a region that maps a TA ELF file, return the load
164    # address of that file.
165    def elf_load_addr(self, addr):
166        if self._regions:
167            for r in self._regions:
168                r_addr = int(r[0], 16)
169                r_size = int(r[1], 16)
170                i_addr = int(addr, 16)
171                if (i_addr >= r_addr and i_addr < (r_addr + r_size)):
172                    # Found region
173                    elf_idx = r[2]
174                    if elf_idx is not None:
175                        return self._elfs[int(elf_idx)][1]
176            # In case address is not found in TA ELF file, fallback to tee.elf
177            # especially to symbolize mixed (user-space and kernel) addresses
178            # which is true when syscall ftrace is enabled along with TA
179            # ftrace.
180            return self._tee_load_addr
181        else:
182            # tee.elf
183            return self._tee_load_addr
184
185    def elf_for_addr(self, addr):
186        l_addr = self.elf_load_addr(addr)
187        if l_addr == self._tee_load_addr:
188            return 'tee.elf'
189        for k in self._elfs:
190            e = self._elfs[k]
191            if int(e[1], 16) == int(l_addr, 16):
192                return e[0]
193        return None
194
195    def subtract_load_addr(self, addr):
196        l_addr = self.elf_load_addr(addr)
197        if l_addr is None:
198            return None
199        if int(l_addr, 16) > int(addr, 16):
200            return ''
201        return '0x{:x}'.format(int(addr, 16) - int(l_addr, 16))
202
203    def resolve(self, addr):
204        reladdr = self.subtract_load_addr(addr)
205        self.spawn_addr2line(self.elf_for_addr(addr))
206        if not reladdr or not self._addr2line:
207            return '???'
208        if self.elf_for_addr(addr) == 'tee.elf':
209            reladdr = '0x{:x}'.format(int(reladdr, 16) +
210                                      int(self.first_vma('tee.elf'), 16))
211        try:
212            print(reladdr, file=self._addr2line.stdin)
213            ret = self._addr2line.stdout.readline().rstrip('\n')
214        except IOError:
215            ret = '!!!'
216        return ret
217
218    def symbol_plus_offset(self, addr):
219        ret = ''
220        prevsize = 0
221        reladdr = self.subtract_load_addr(addr)
222        elf_name = self.elf_for_addr(addr)
223        if elf_name is None:
224            return ''
225        elf = self.get_elf(elf_name)
226        cmd = self.arch_prefix('nm', elf)
227        if not reladdr or not elf or not cmd:
228            return ''
229        ireladdr = int(reladdr, 16)
230        nm = self.my_Popen([cmd, '--numeric-sort', '--print-size', elf])
231        for line in iter(nm.stdout.readline, ''):
232            try:
233                addr, size, _, name = line.split()
234            except ValueError:
235                # Size is missing
236                try:
237                    addr, _, name = line.split()
238                    size = '0'
239                except ValueError:
240                    # E.g., undefined (external) symbols (line = "U symbol")
241                    continue
242            iaddr = int(addr, 16)
243            isize = int(size, 16)
244            if iaddr == ireladdr:
245                ret = name
246                break
247            if iaddr < ireladdr and iaddr + isize >= ireladdr:
248                offs = ireladdr - iaddr
249                ret = name + '+' + str(offs)
250                break
251            if iaddr > ireladdr and prevsize == 0:
252                offs = iaddr + ireladdr
253                ret = prevname + '+' + str(offs)
254                break
255            prevsize = size
256            prevname = name
257        nm.terminate()
258        return ret
259
260    def section_plus_offset(self, addr):
261        ret = ''
262        reladdr = self.subtract_load_addr(addr)
263        elf_name = self.elf_for_addr(addr)
264        if elf_name is None:
265            return ''
266        elf = self.get_elf(elf_name)
267        cmd = self.arch_prefix('objdump', elf)
268        if not reladdr or not elf or not cmd:
269            return ''
270        iaddr = int(reladdr, 16)
271        objdump = self.my_Popen([cmd, '--section-headers', elf])
272        for line in iter(objdump.stdout.readline, ''):
273            try:
274                idx, name, size, vma, lma, offs, algn = line.split()
275            except ValueError:
276                continue
277            ivma = int(vma, 16)
278            isize = int(size, 16)
279            if ivma == iaddr:
280                ret = name
281                break
282            if ivma < iaddr and ivma + isize >= iaddr:
283                offs = iaddr - ivma
284                ret = name + '+' + str(offs)
285                break
286        objdump.terminate()
287        return ret
288
289    def process_abort(self, line):
290        ret = ''
291        match = re.search(ABORT_ADDR_RE, line)
292        addr = match.group('addr')
293        pre = match.start('addr')
294        post = match.end('addr')
295        sym = self.symbol_plus_offset(addr)
296        sec = self.section_plus_offset(addr)
297        if sym or sec:
298            ret += line[:pre]
299            ret += addr
300            if sym:
301                ret += ' ' + sym
302            if sec:
303                ret += ' ' + sec
304            ret += line[post:]
305        return ret
306
307    # Return all ELF sections with the ALLOC flag
308    def read_sections(self, elf_name):
309        if elf_name is None:
310            return
311        if elf_name in self._sections:
312            return
313        elf = self.get_elf(elf_name)
314        if not elf:
315            return
316        cmd = self.arch_prefix('objdump', elf)
317        if not elf or not cmd:
318            return
319        self._sections[elf_name] = []
320        objdump = self.my_Popen([cmd, '--section-headers', elf])
321        for line in iter(objdump.stdout.readline, ''):
322            try:
323                _, name, size, vma, _, _, _ = line.split()
324            except ValueError:
325                if 'ALLOC' in line:
326                    self._sections[elf_name].append([name, int(vma, 16),
327                                                     int(size, 16)])
328
329    def first_vma(self, elf_name):
330        self.read_sections(elf_name)
331        return '0x{:x}'.format(self._sections[elf_name][0][1])
332
333    def overlaps(self, section, addr, size):
334        sec_addr = section[1]
335        sec_size = section[2]
336        if not size or not sec_size:
337            return False
338        return ((addr <= (sec_addr + sec_size - 1)) and
339                ((addr + size - 1) >= sec_addr))
340
341    def sections_in_region(self, addr, size, elf_idx):
342        ret = ''
343        addr = self.subtract_load_addr(addr)
344        if not addr:
345            return ''
346        iaddr = int(addr, 16)
347        isize = int(size, 16)
348        elf = self._elfs[int(elf_idx)][0]
349        if elf is None:
350            return ''
351        self.read_sections(elf)
352        if elf not in self._sections:
353            return ''
354        for s in self._sections[elf]:
355            if self.overlaps(s, iaddr, isize):
356                ret += ' ' + s[0]
357        return ret
358
359    def reset(self):
360        self._call_stack_found = False
361        if self._addr2line:
362            self._addr2line.terminate()
363            self._addr2line = None
364        self._addr2line_elf_name = None
365        self._arch = None
366        self._saved_abort_line = ''
367        self._sections = {}  # {elf_name: [[name, addr, size], ...], ...}
368        self._regions = []   # [[addr, size, elf_idx, saved line], ...]
369        self._elfs = {0: ["tee.elf", 0]}  # {idx: [uuid, load_addr], ...}
370        self._tee_load_addr = '0x0'
371        self._func_graph_found = False
372        self._func_graph_skip_line = True
373
374    def pretty_print_path(self, path):
375        if self._strip_path:
376            return re.sub(re.escape(self._strip_path) + '/*', '', path)
377        return path
378
379    def write(self, line):
380        if self._call_stack_found:
381            match = re.search(STACK_ADDR_RE, line)
382            if match:
383                addr = match.group('addr')
384                pre = match.start('addr')
385                post = match.end('addr')
386                self._out.write(line[:pre])
387                self._out.write(addr)
388                res = self.resolve(addr)
389                res = self.pretty_print_path(res)
390                self._out.write(' ' + res)
391                self._out.write(line[post:])
392                return
393            else:
394                self.reset()
395        if self._func_graph_found:
396            match = re.search(GRAPH_ADDR_RE, line)
397            match_re = re.search(GRAPH_RE, line)
398            if match:
399                addr = match.group('addr')
400                pre = match.start('addr')
401                post = match.end('addr')
402                self._out.write(line[:pre])
403                res = self.resolve(addr)
404                res_arr = re.split(' ', res)
405                self._out.write(res_arr[0])
406                self._out.write(line[post:])
407                self._func_graph_skip_line = False
408                return
409            elif match_re:
410                self._out.write(line)
411                return
412            elif self._func_graph_skip_line:
413                return
414            else:
415                self.reset()
416        match = re.search(REGION_RE, line)
417        if match:
418            # Region table: save info for later processing once
419            # we know which UUID corresponds to which ELF index
420            addr = match.group('addr')
421            size = match.group('size')
422            elf_idx = match.group('elf_idx')
423            self._regions.append([addr, size, elf_idx, line])
424            return
425        match = re.search(ELF_LIST_RE, line)
426        if match:
427            # ELF list: save info for later. Region table and ELF list
428            # will be displayed when the call stack is reached
429            i = int(match.group('idx'))
430            self._elfs[i] = [match.group('uuid'), match.group('load_addr'),
431                             line]
432            return
433        match = re.search(TEE_LOAD_ADDR_RE, line)
434        if match:
435            self._tee_load_addr = match.group('load_addr')
436        match = re.search(CALL_STACK_RE, line)
437        if match:
438            self._call_stack_found = True
439            if self._regions:
440                for r in self._regions:
441                    r_addr = r[0]
442                    r_size = r[1]
443                    elf_idx = r[2]
444                    saved_line = r[3]
445                    if elf_idx is None:
446                        self._out.write(saved_line)
447                    else:
448                        self._out.write(saved_line.strip() +
449                                        self.sections_in_region(r_addr,
450                                                                r_size,
451                                                                elf_idx) +
452                                        '\n')
453            if self._elfs:
454                for k in self._elfs:
455                    e = self._elfs[k]
456                    if (len(e) >= 3):
457                        # TA executable or library
458                        self._out.write(e[2].strip())
459                        elf = self.get_elf(e[0])
460                        if elf:
461                            rpath = os.path.realpath(elf)
462                            path = self.pretty_print_path(rpath)
463                            self._out.write(' (' + path + ')')
464                        self._out.write('\n')
465            # Here is a good place to resolve the abort address because we
466            # have all the information we need
467            if self._saved_abort_line:
468                self._out.write(self.process_abort(self._saved_abort_line))
469        match = re.search(FUNC_GRAPH_RE, line)
470        if match:
471            self._func_graph_found = True
472        match = re.search(ABORT_ADDR_RE, line)
473        if match:
474            self.reset()
475            # At this point the arch and TA load address are unknown.
476            # Save the line so We can translate the abort address later.
477            self._saved_abort_line = line
478        self._out.write(line)
479
480    def flush(self):
481        self._out.flush()
482
483
484def main():
485    args = get_args()
486    if args.dir:
487        # Flatten list in case -d is used several times *and* with multiple
488        # arguments
489        args.dirs = [item for sublist in args.dir for item in sublist]
490    else:
491        args.dirs = []
492    symbolizer = Symbolizer(sys.stdout, args.dirs, args.strip_path)
493
494    fd = sys.stdin.fileno()
495    isatty = os.isatty(fd)
496    if isatty:
497        old = termios.tcgetattr(fd)
498        new = termios.tcgetattr(fd)
499        new[3] = new[3] & ~termios.ECHO  # lflags
500    try:
501        if isatty:
502            termios.tcsetattr(fd, termios.TCSADRAIN, new)
503        for line in sys.stdin:
504            symbolizer.write(line)
505    finally:
506        symbolizer.flush()
507        if isatty:
508            termios.tcsetattr(fd, termios.TCSADRAIN, old)
509
510
511if __name__ == "__main__":
512    main()
513