xref: /optee_os/scripts/symbolize.py (revision 43be6453dd3e98d39721c8bc6725416772f4205c)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: BSD-2-Clause
3#
4# Copyright (c) 2017, Linaro Limited
5#
6
7
8import argparse
9import errno
10import glob
11import os
12import re
13import subprocess
14import sys
15import termios
16
17CALL_STACK_RE = re.compile('Call stack:')
18TEE_LOAD_ADDR_RE = re.compile(r'TEE load address @ (?P<load_addr>0x[0-9a-f]+)')
19# This gets the address from lines looking like this:
20# E/TC:0  0x001044a8
21STACK_ADDR_RE = re.compile(
22    r'[UEIDFM]/(TC|LD):(\?*|[0-9]*) [0-9]* +(?P<addr>0x[0-9a-f]+)')
23ABORT_ADDR_RE = re.compile(r'-abort at address (?P<addr>0x[0-9a-f]+)')
24REGION_RE = re.compile(r'region +[0-9]+: va (?P<addr>0x[0-9a-f]+) '
25                       r'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)'
26                       r'( flags .{4} (\[(?P<elf_idx>[0-9]+)\])?)?')
27ELF_LIST_RE = re.compile(r'\[(?P<idx>[0-9]+)\] (?P<uuid>[0-9a-f\-]+)'
28                         r' @ (?P<load_addr>0x[0-9a-f\-]+)')
29FUNC_GRAPH_RE = re.compile(r'Function graph')
30GRAPH_ADDR_RE = re.compile(r'(?P<addr>0x[0-9a-f]+)')
31GRAPH_RE = re.compile(r'}')
32
33epilog = '''
34This scripts reads an OP-TEE abort or panic message from stdin and adds debug
35information to the output, such as '<function> at <file>:<line>' next to each
36address in the call stack. Any message generated by OP-TEE and containing a
37call stack can in principle be processed by this script. This currently
38includes aborts and panics from the TEE core as well as from any TA.
39The paths provided on the command line are used to locate the appropriate ELF
40binary (tee.elf or Trusted Application). The GNU binutils (addr2line, objdump,
41nm) are used to extract the debug info. If the CROSS_COMPILE environment
42variable is set, it is used as a prefix to the binutils tools. That is, the
43script will invoke $(CROSS_COMPILE)addr2line etc. If it is not set however,
44the prefix will be determined automatically for each ELF file based on its
45architecture (arm-linux-gnueabihf-, aarch64-linux-gnu-). The resulting command
46is then expected to be found in the user's PATH.
47
48OP-TEE abort and panic messages are sent to the secure console. They look like
49the following:
50
51  E/TC:0 User TA data-abort at address 0xffffdecd (alignment fault)
52  ...
53  E/TC:0 Call stack:
54  E/TC:0  0x4000549e
55  E/TC:0  0x40001f4b
56  E/TC:0  0x4000273f
57  E/TC:0  0x40005da7
58
59Inspired by a script of the same name by the Chromium project.
60
61Sample usage:
62
63  $ scripts/symbolize.py -d out/arm-plat-hikey/core -d ../optee_test/out/ta/*
64  <paste whole dump here>
65  ^D
66
67Also, this script reads function graph generated for OP-TEE user TA from
68/tmp/ftrace-<ta_uuid>.out file and resolves function addresses to corresponding
69symbols.
70
71Sample usage:
72
73  $ cat /tmp/ftrace-<ta_uuid>.out | scripts/symbolize.py -d <ta_uuid>.elf
74  <paste function graph here>
75  ^D
76'''
77
78
79def get_args():
80    parser = argparse.ArgumentParser(
81        formatter_class=argparse.RawDescriptionHelpFormatter,
82        description='Symbolizes OP-TEE abort dumps or function graphs',
83        epilog=epilog)
84    parser.add_argument('-d', '--dir', action='append', nargs='+',
85                        help='Search for ELF file in DIR. tee.elf is needed '
86                        'to decode a TEE Core or pseudo-TA abort, while '
87                        '<TA_uuid>.elf is required if a user-mode TA has '
88                        'crashed. For convenience, ELF files may also be '
89                        'given.')
90    parser.add_argument('-s', '--strip_path', nargs='?',
91                        help='Strip STRIP_PATH from file paths (default: '
92                        'current directory, use -s with no argument to show '
93                        'full paths)', default=os.getcwd())
94
95    return parser.parse_args()
96
97
98class Symbolizer(object):
99    def __init__(self, out, dirs, strip_path):
100        self._out = out
101        self._dirs = dirs
102        self._strip_path = strip_path
103        self._addr2line = None
104        self.reset()
105
106    def my_Popen(self, cmd):
107        try:
108            return subprocess.Popen(cmd, stdin=subprocess.PIPE,
109                                    stdout=subprocess.PIPE, text=True,
110                                    bufsize=1)
111        except OSError as e:
112            if e.errno == errno.ENOENT:
113                print("*** Error:{}: command not found".format(cmd[0]),
114                      file=sys.stderr)
115                sys.exit(1)
116
117    def get_elf(self, elf_or_uuid):
118        if not elf_or_uuid.endswith('.elf'):
119            elf_or_uuid += '.elf'
120        for d in self._dirs:
121            if d.endswith(elf_or_uuid) and os.path.isfile(d):
122                return d
123            elf = glob.glob(d + '/' + elf_or_uuid)
124            if elf:
125                return elf[0]
126
127    def set_arch(self):
128        if self._arch:
129            return
130        self._arch = os.getenv('CROSS_COMPILE')
131        if self._arch:
132            return
133        elf = self.get_elf(self._elfs[0][0])
134        if elf is None:
135            return
136        p = subprocess.Popen(['file', self.get_elf(self._elfs[0][0])],
137                             stdout=subprocess.PIPE)
138        output = p.stdout.readlines()
139        p.terminate()
140        if b'ARM aarch64,' in output[0]:
141            self._arch = 'aarch64-linux-gnu-'
142        elif b'ARM,' in output[0]:
143            self._arch = 'arm-linux-gnueabihf-'
144
145    def arch_prefix(self, cmd):
146        self.set_arch()
147        if self._arch is None:
148            return ''
149        return self._arch + cmd
150
151    def spawn_addr2line(self, elf_name):
152        if elf_name is None:
153            return
154        if self._addr2line_elf_name is elf_name:
155            return
156        if self._addr2line:
157            self._addr2line.terminate
158            self._addr2line = None
159        elf = self.get_elf(elf_name)
160        if not elf:
161            return
162        cmd = self.arch_prefix('addr2line')
163        if not cmd:
164            return
165        args = [cmd]
166        if elf_name == 'tee.elf' and self._tee_load_addr != '0x0':
167            args += ['-j.text']
168        args += ['-f', '-p', '-e', elf]
169        self._addr2line = self.my_Popen(args)
170        self._addr2line_elf_name = elf_name
171
172    # If addr falls into a region that maps a TA ELF file, return the load
173    # address of that file.
174    def elf_load_addr(self, addr):
175        if self._regions:
176            for r in self._regions:
177                r_addr = int(r[0], 16)
178                r_size = int(r[1], 16)
179                i_addr = int(addr, 16)
180                if (i_addr >= r_addr and i_addr < (r_addr + r_size)):
181                    # Found region
182                    elf_idx = r[2]
183                    if elf_idx is not None:
184                        return self._elfs[int(elf_idx)][1]
185            # In case address is not found in TA ELF file, fallback to tee.elf
186            # especially to symbolize mixed (user-space and kernel) addresses
187            # which is true when syscall ftrace is enabled along with TA
188            # ftrace.
189            return self._tee_load_addr
190        else:
191            # tee.elf
192            return self._tee_load_addr
193
194    def elf_for_addr(self, addr):
195        l_addr = self.elf_load_addr(addr)
196        if l_addr == self._tee_load_addr:
197            return 'tee.elf'
198        for k in self._elfs:
199            e = self._elfs[k]
200            if int(e[1], 16) == int(l_addr, 16):
201                return e[0]
202        return None
203
204    def subtract_load_addr(self, addr):
205        l_addr = self.elf_load_addr(addr)
206        if l_addr is None:
207            return None
208        if int(l_addr, 16) > int(addr, 16):
209            return ''
210        return '0x{:x}'.format(int(addr, 16) - int(l_addr, 16))
211
212    def resolve(self, addr):
213        reladdr = self.subtract_load_addr(addr)
214        self.spawn_addr2line(self.elf_for_addr(addr))
215        if not reladdr or not self._addr2line:
216            return '???'
217        try:
218            print(reladdr, file=self._addr2line.stdin)
219            ret = self._addr2line.stdout.readline().rstrip('\n')
220        except IOError:
221            ret = '!!!'
222        return ret
223
224    def symbol_plus_offset(self, addr):
225        ret = ''
226        prevsize = 0
227        reladdr = self.subtract_load_addr(addr)
228        elf_name = self.elf_for_addr(addr)
229        if elf_name is None:
230            return ''
231        elf = self.get_elf(elf_name)
232        cmd = self.arch_prefix('nm')
233        if not reladdr or not elf or not cmd:
234            return ''
235        ireladdr = int(reladdr, 16)
236        nm = self.my_Popen([cmd, '--numeric-sort', '--print-size', elf])
237        for line in iter(nm.stdout.readline, ''):
238            try:
239                addr, size, _, name = line.split()
240            except ValueError:
241                # Size is missing
242                try:
243                    addr, _, name = line.split()
244                    size = '0'
245                except ValueError:
246                    # E.g., undefined (external) symbols (line = "U symbol")
247                    continue
248            iaddr = int(addr, 16)
249            isize = int(size, 16)
250            if iaddr == ireladdr:
251                ret = name
252                break
253            if iaddr < ireladdr and iaddr + isize >= ireladdr:
254                offs = ireladdr - iaddr
255                ret = name + '+' + str(offs)
256                break
257            if iaddr > ireladdr and prevsize == 0:
258                offs = iaddr + ireladdr
259                ret = prevname + '+' + str(offs)
260                break
261            prevsize = size
262            prevname = name
263        nm.terminate()
264        return ret
265
266    def section_plus_offset(self, addr):
267        ret = ''
268        reladdr = self.subtract_load_addr(addr)
269        elf_name = self.elf_for_addr(addr)
270        if elf_name is None:
271            return ''
272        elf = self.get_elf(elf_name)
273        cmd = self.arch_prefix('objdump')
274        if not reladdr or not elf or not cmd:
275            return ''
276        iaddr = int(reladdr, 16)
277        objdump = self.my_Popen([cmd, '--section-headers', elf])
278        for line in iter(objdump.stdout.readline, ''):
279            try:
280                idx, name, size, vma, lma, offs, algn = line.split()
281            except ValueError:
282                continue
283            ivma = int(vma, 16)
284            isize = int(size, 16)
285            if ivma == iaddr:
286                ret = name
287                break
288            if ivma < iaddr and ivma + isize >= iaddr:
289                offs = iaddr - ivma
290                ret = name + '+' + str(offs)
291                break
292        objdump.terminate()
293        return ret
294
295    def process_abort(self, line):
296        ret = ''
297        match = re.search(ABORT_ADDR_RE, line)
298        addr = match.group('addr')
299        pre = match.start('addr')
300        post = match.end('addr')
301        sym = self.symbol_plus_offset(addr)
302        sec = self.section_plus_offset(addr)
303        if sym or sec:
304            ret += line[:pre]
305            ret += addr
306            if sym:
307                ret += ' ' + sym
308            if sec:
309                ret += ' ' + sec
310            ret += line[post:]
311        return ret
312
313    # Return all ELF sections with the ALLOC flag
314    def read_sections(self, elf_name):
315        if elf_name is None:
316            return
317        if elf_name in self._sections:
318            return
319        elf = self.get_elf(elf_name)
320        cmd = self.arch_prefix('objdump')
321        if not elf or not cmd:
322            return
323        self._sections[elf_name] = []
324        objdump = self.my_Popen([cmd, '--section-headers', elf])
325        for line in iter(objdump.stdout.readline, ''):
326            try:
327                _, name, size, vma, _, _, _ = line.split()
328            except ValueError:
329                if 'ALLOC' in line:
330                    self._sections[elf_name].append([name, int(vma, 16),
331                                                     int(size, 16)])
332
333    def overlaps(self, section, addr, size):
334        sec_addr = section[1]
335        sec_size = section[2]
336        if not size or not sec_size:
337            return False
338        return ((addr <= (sec_addr + sec_size - 1)) and
339                ((addr + size - 1) >= sec_addr))
340
341    def sections_in_region(self, addr, size, elf_idx):
342        ret = ''
343        addr = self.subtract_load_addr(addr)
344        if not addr:
345            return ''
346        iaddr = int(addr, 16)
347        isize = int(size, 16)
348        elf = self._elfs[int(elf_idx)][0]
349        if elf is None:
350            return ''
351        self.read_sections(elf)
352        if elf not in self._sections:
353            return ''
354        for s in self._sections[elf]:
355            if self.overlaps(s, iaddr, isize):
356                ret += ' ' + s[0]
357        return ret
358
359    def reset(self):
360        self._call_stack_found = False
361        if self._addr2line:
362            self._addr2line.terminate()
363            self._addr2line = None
364        self._addr2line_elf_name = None
365        self._arch = None
366        self._saved_abort_line = ''
367        self._sections = {}  # {elf_name: [[name, addr, size], ...], ...}
368        self._regions = []   # [[addr, size, elf_idx, saved line], ...]
369        self._elfs = {0: ["tee.elf", 0]}  # {idx: [uuid, load_addr], ...}
370        self._tee_load_addr = '0x0'
371        self._func_graph_found = False
372        self._func_graph_skip_line = True
373
374    def pretty_print_path(self, path):
375        if self._strip_path:
376            return re.sub(re.escape(self._strip_path) + '/*', '', path)
377        return path
378
379    def write(self, line):
380        if self._call_stack_found:
381            match = re.search(STACK_ADDR_RE, line)
382            if match:
383                addr = match.group('addr')
384                pre = match.start('addr')
385                post = match.end('addr')
386                self._out.write(line[:pre])
387                self._out.write(addr)
388                res = self.resolve(addr)
389                res = self.pretty_print_path(res)
390                self._out.write(' ' + res)
391                self._out.write(line[post:])
392                return
393            else:
394                self.reset()
395        if self._func_graph_found:
396            match = re.search(GRAPH_ADDR_RE, line)
397            match_re = re.search(GRAPH_RE, line)
398            if match:
399                addr = match.group('addr')
400                pre = match.start('addr')
401                post = match.end('addr')
402                self._out.write(line[:pre])
403                res = self.resolve(addr)
404                res_arr = re.split(' ', res)
405                self._out.write(res_arr[0])
406                self._out.write(line[post:])
407                self._func_graph_skip_line = False
408                return
409            elif match_re:
410                self._out.write(line)
411                return
412            elif self._func_graph_skip_line:
413                return
414            else:
415                self.reset()
416        match = re.search(REGION_RE, line)
417        if match:
418            # Region table: save info for later processing once
419            # we know which UUID corresponds to which ELF index
420            addr = match.group('addr')
421            size = match.group('size')
422            elf_idx = match.group('elf_idx')
423            self._regions.append([addr, size, elf_idx, line])
424            return
425        match = re.search(ELF_LIST_RE, line)
426        if match:
427            # ELF list: save info for later. Region table and ELF list
428            # will be displayed when the call stack is reached
429            i = int(match.group('idx'))
430            self._elfs[i] = [match.group('uuid'), match.group('load_addr'),
431                             line]
432            return
433        match = re.search(TEE_LOAD_ADDR_RE, line)
434        if match:
435            self._tee_load_addr = match.group('load_addr')
436        match = re.search(CALL_STACK_RE, line)
437        if match:
438            self._call_stack_found = True
439            if self._regions:
440                for r in self._regions:
441                    r_addr = r[0]
442                    r_size = r[1]
443                    elf_idx = r[2]
444                    saved_line = r[3]
445                    if elf_idx is None:
446                        self._out.write(saved_line)
447                    else:
448                        self._out.write(saved_line.strip() +
449                                        self.sections_in_region(r_addr,
450                                                                r_size,
451                                                                elf_idx) +
452                                        '\n')
453            if self._elfs:
454                for k in self._elfs:
455                    e = self._elfs[k]
456                    if (len(e) >= 3):
457                        # TA executable or library
458                        self._out.write(e[2].strip())
459                        elf = self.get_elf(e[0])
460                        if elf:
461                            rpath = os.path.realpath(elf)
462                            path = self.pretty_print_path(rpath)
463                            self._out.write(' (' + path + ')')
464                        self._out.write('\n')
465            # Here is a good place to resolve the abort address because we
466            # have all the information we need
467            if self._saved_abort_line:
468                self._out.write(self.process_abort(self._saved_abort_line))
469        match = re.search(FUNC_GRAPH_RE, line)
470        if match:
471            self._func_graph_found = True
472        match = re.search(ABORT_ADDR_RE, line)
473        if match:
474            self.reset()
475            # At this point the arch and TA load address are unknown.
476            # Save the line so We can translate the abort address later.
477            self._saved_abort_line = line
478        self._out.write(line)
479
480    def flush(self):
481        self._out.flush()
482
483
484def main():
485    args = get_args()
486    if args.dir:
487        # Flatten list in case -d is used several times *and* with multiple
488        # arguments
489        args.dirs = [item for sublist in args.dir for item in sublist]
490    else:
491        args.dirs = []
492    symbolizer = Symbolizer(sys.stdout, args.dirs, args.strip_path)
493
494    fd = sys.stdin.fileno()
495    isatty = os.isatty(fd)
496    if isatty:
497        old = termios.tcgetattr(fd)
498        new = termios.tcgetattr(fd)
499        new[3] = new[3] & ~termios.ECHO  # lflags
500    try:
501        if isatty:
502            termios.tcsetattr(fd, termios.TCSADRAIN, new)
503        for line in sys.stdin:
504            symbolizer.write(line)
505    finally:
506        symbolizer.flush()
507        if isatty:
508            termios.tcsetattr(fd, termios.TCSADRAIN, old)
509
510
511if __name__ == "__main__":
512    main()
513