xref: /optee_os/scripts/symbolize.py (revision c0c57c8fa58346f3b0837a02c313e447c91b74c9)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: BSD-2-Clause
3#
4# Copyright (c) 2017, Linaro Limited
5#
6
7
8import argparse
9import errno
10import glob
11import os
12import re
13import subprocess
14import sys
15import termios
16
17CALL_STACK_RE = re.compile('Call stack:')
18TEE_LOAD_ADDR_RE = re.compile(r'TEE load address @ (?P<load_addr>0x[0-9a-f]+)')
19# This gets the address from lines looking like this:
20# E/TC:0  0x001044a8
21STACK_ADDR_RE = re.compile(
22    r'[UEIDFM]/(TC|LD):(\?*|[0-9]*) [0-9]* +(?P<addr>0x[0-9a-f]+)')
23ABORT_ADDR_RE = re.compile(r'-abort at address (?P<addr>0x[0-9a-f]+)')
24REGION_RE = re.compile(r'region +[0-9]+: va (?P<addr>0x[0-9a-f]+) '
25                       r'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)'
26                       r'( flags .{4} (\[(?P<elf_idx>[0-9]+)\])?)?')
27ELF_LIST_RE = re.compile(r'\[(?P<idx>[0-9]+)\] (?P<uuid>[0-9a-f\-]+)'
28                         r' @ (?P<load_addr>0x[0-9a-f\-]+)')
29FUNC_GRAPH_RE = re.compile(r'Function graph')
30GRAPH_ADDR_RE = re.compile(r'(?P<addr>0x[0-9a-f]+)')
31GRAPH_RE = re.compile(r'}')
32
33epilog = '''
34This scripts reads an OP-TEE abort or panic message from stdin and adds debug
35information to the output, such as '<function> at <file>:<line>' next to each
36address in the call stack. Any message generated by OP-TEE and containing a
37call stack can in principle be processed by this script. This currently
38includes aborts and panics from the TEE core as well as from any TA.
39The paths provided on the command line are used to locate the appropriate ELF
40binary (tee.elf or Trusted Application). The GNU binutils (addr2line, objdump,
41nm) are used to extract the debug info. If the CROSS_COMPILE environment
42variable is set, it is used as a prefix to the binutils tools. That is, the
43script will invoke $(CROSS_COMPILE)addr2line etc. If it is not set however,
44the prefix will be determined automatically for each ELF file based on its
45architecture (arm-linux-gnueabihf-, aarch64-linux-gnu-). The resulting command
46is then expected to be found in the user's PATH.
47
48OP-TEE abort and panic messages are sent to the secure console. They look like
49the following:
50
51  E/TC:0 User TA data-abort at address 0xffffdecd (alignment fault)
52  ...
53  E/TC:0 Call stack:
54  E/TC:0  0x4000549e
55  E/TC:0  0x40001f4b
56  E/TC:0  0x4000273f
57  E/TC:0  0x40005da7
58
59Inspired by a script of the same name by the Chromium project.
60
61Sample usage:
62
63  $ scripts/symbolize.py -d out/arm-plat-hikey/core -d ../optee_test/out/ta/*
64  <paste whole dump here>
65  ^D
66
67Also, this script reads function graph generated for OP-TEE user TA from
68/tmp/ftrace-<ta_uuid>.out file and resolves function addresses to corresponding
69symbols.
70
71Sample usage:
72
73  $ cat /tmp/ftrace-<ta_uuid>.out | scripts/symbolize.py -d <ta_uuid>.elf
74  <paste function graph here>
75  ^D
76'''
77
78
79def get_args():
80    parser = argparse.ArgumentParser(
81        formatter_class=argparse.RawDescriptionHelpFormatter,
82        description='Symbolizes OP-TEE abort dumps or function graphs',
83        epilog=epilog)
84    parser.add_argument('-d', '--dir', action='append', nargs='+',
85                        help='Search for ELF file in DIR. tee.elf is needed '
86                        'to decode a TEE Core or pseudo-TA abort, while '
87                        '<TA_uuid>.elf is required if a user-mode TA has '
88                        'crashed. For convenience, ELF files may also be '
89                        'given.')
90    parser.add_argument('-s', '--strip_path', nargs='?',
91                        help='Strip STRIP_PATH from file paths (default: '
92                        'current directory, use -s with no argument to show '
93                        'full paths)', default=os.getcwd())
94
95    return parser.parse_args()
96
97
98class Symbolizer(object):
99    def __init__(self, out, dirs, strip_path):
100        self._out = out
101        self._dirs = dirs
102        self._strip_path = strip_path
103        self._addr2line = None
104        self.reset()
105
106    def my_Popen(self, cmd):
107        try:
108            return subprocess.Popen(cmd, stdin=subprocess.PIPE,
109                                    stdout=subprocess.PIPE, text=True,
110                                    bufsize=1)
111        except OSError as e:
112            if e.errno == errno.ENOENT:
113                print("*** Error:{}: command not found".format(cmd[0]),
114                      file=sys.stderr)
115                sys.exit(1)
116
117    def get_elf(self, elf_or_uuid):
118        if not elf_or_uuid.endswith('.elf'):
119            elf_or_uuid += '.elf'
120        for d in self._dirs:
121            if d.endswith(elf_or_uuid) and os.path.isfile(d):
122                return d
123            elf = glob.glob(d + '/' + elf_or_uuid)
124            if elf:
125                return elf[0]
126
127    def set_arch(self):
128        if self._arch:
129            return
130        self._arch = os.getenv('CROSS_COMPILE')
131        if self._arch:
132            return
133        elf = self.get_elf(self._elfs[0][0])
134        if elf is None:
135            return
136        p = subprocess.Popen(['file', self.get_elf(self._elfs[0][0])],
137                             stdout=subprocess.PIPE)
138        output = p.stdout.readlines()
139        p.terminate()
140        if b'ARM aarch64,' in output[0]:
141            self._arch = 'aarch64-linux-gnu-'
142        elif b'ARM,' in output[0]:
143            self._arch = 'arm-linux-gnueabihf-'
144
145    def arch_prefix(self, cmd):
146        self.set_arch()
147        if self._arch is None:
148            return ''
149        return self._arch + cmd
150
151    def spawn_addr2line(self, elf_name):
152        if elf_name is None:
153            return
154        if self._addr2line_elf_name is elf_name:
155            return
156        if self._addr2line:
157            self._addr2line.terminate
158            self._addr2line = None
159        elf = self.get_elf(elf_name)
160        if not elf:
161            return
162        cmd = self.arch_prefix('addr2line')
163        if not cmd:
164            return
165        self._addr2line = self.my_Popen([cmd, '-f', '-p', '-e', elf])
166
167    # If addr falls into a region that maps a TA ELF file, return the load
168    # address of that file.
169    def elf_load_addr(self, addr):
170        if self._regions:
171            for r in self._regions:
172                r_addr = int(r[0], 16)
173                r_size = int(r[1], 16)
174                i_addr = int(addr, 16)
175                if (i_addr >= r_addr and i_addr < (r_addr + r_size)):
176                    # Found region
177                    elf_idx = r[2]
178                    if elf_idx is not None:
179                        return self._elfs[int(elf_idx)][1]
180            # In case address is not found in TA ELF file, fallback to tee.elf
181            # especially to symbolize mixed (user-space and kernel) addresses
182            # which is true when syscall ftrace is enabled along with TA
183            # ftrace.
184            return self._tee_load_addr
185        else:
186            # tee.elf
187            return self._tee_load_addr
188
189    def elf_for_addr(self, addr):
190        l_addr = self.elf_load_addr(addr)
191        if l_addr == self._tee_load_addr:
192            return 'tee.elf'
193        for k in self._elfs:
194            e = self._elfs[k]
195            if int(e[1], 16) == int(l_addr, 16):
196                return e[0]
197        return None
198
199    def subtract_load_addr(self, addr):
200        l_addr = self.elf_load_addr(addr)
201        if l_addr is None:
202            return None
203        if int(l_addr, 16) > int(addr, 16):
204            return ''
205        return '0x{:x}'.format(int(addr, 16) - int(l_addr, 16))
206
207    def resolve(self, addr):
208        reladdr = self.subtract_load_addr(addr)
209        self.spawn_addr2line(self.elf_for_addr(addr))
210        if not reladdr or not self._addr2line:
211            return '???'
212        if self.elf_for_addr(addr) == 'tee.elf':
213            reladdr = '0x{:x}'.format(int(reladdr, 16) +
214                                      int(self.first_vma('tee.elf'), 16))
215        try:
216            print(reladdr, file=self._addr2line.stdin)
217            ret = self._addr2line.stdout.readline().rstrip('\n')
218        except IOError:
219            ret = '!!!'
220        return ret
221
222    def symbol_plus_offset(self, addr):
223        ret = ''
224        prevsize = 0
225        reladdr = self.subtract_load_addr(addr)
226        elf_name = self.elf_for_addr(addr)
227        if elf_name is None:
228            return ''
229        elf = self.get_elf(elf_name)
230        cmd = self.arch_prefix('nm')
231        if not reladdr or not elf or not cmd:
232            return ''
233        ireladdr = int(reladdr, 16)
234        nm = self.my_Popen([cmd, '--numeric-sort', '--print-size', elf])
235        for line in iter(nm.stdout.readline, ''):
236            try:
237                addr, size, _, name = line.split()
238            except ValueError:
239                # Size is missing
240                try:
241                    addr, _, name = line.split()
242                    size = '0'
243                except ValueError:
244                    # E.g., undefined (external) symbols (line = "U symbol")
245                    continue
246            iaddr = int(addr, 16)
247            isize = int(size, 16)
248            if iaddr == ireladdr:
249                ret = name
250                break
251            if iaddr < ireladdr and iaddr + isize >= ireladdr:
252                offs = ireladdr - iaddr
253                ret = name + '+' + str(offs)
254                break
255            if iaddr > ireladdr and prevsize == 0:
256                offs = iaddr + ireladdr
257                ret = prevname + '+' + str(offs)
258                break
259            prevsize = size
260            prevname = name
261        nm.terminate()
262        return ret
263
264    def section_plus_offset(self, addr):
265        ret = ''
266        reladdr = self.subtract_load_addr(addr)
267        elf_name = self.elf_for_addr(addr)
268        if elf_name is None:
269            return ''
270        elf = self.get_elf(elf_name)
271        cmd = self.arch_prefix('objdump')
272        if not reladdr or not elf or not cmd:
273            return ''
274        iaddr = int(reladdr, 16)
275        objdump = self.my_Popen([cmd, '--section-headers', elf])
276        for line in iter(objdump.stdout.readline, ''):
277            try:
278                idx, name, size, vma, lma, offs, algn = line.split()
279            except ValueError:
280                continue
281            ivma = int(vma, 16)
282            isize = int(size, 16)
283            if ivma == iaddr:
284                ret = name
285                break
286            if ivma < iaddr and ivma + isize >= iaddr:
287                offs = iaddr - ivma
288                ret = name + '+' + str(offs)
289                break
290        objdump.terminate()
291        return ret
292
293    def process_abort(self, line):
294        ret = ''
295        match = re.search(ABORT_ADDR_RE, line)
296        addr = match.group('addr')
297        pre = match.start('addr')
298        post = match.end('addr')
299        sym = self.symbol_plus_offset(addr)
300        sec = self.section_plus_offset(addr)
301        if sym or sec:
302            ret += line[:pre]
303            ret += addr
304            if sym:
305                ret += ' ' + sym
306            if sec:
307                ret += ' ' + sec
308            ret += line[post:]
309        return ret
310
311    # Return all ELF sections with the ALLOC flag
312    def read_sections(self, elf_name):
313        if elf_name is None:
314            return
315        if elf_name in self._sections:
316            return
317        elf = self.get_elf(elf_name)
318        cmd = self.arch_prefix('objdump')
319        if not elf or not cmd:
320            return
321        self._sections[elf_name] = []
322        objdump = self.my_Popen([cmd, '--section-headers', elf])
323        for line in iter(objdump.stdout.readline, ''):
324            try:
325                _, name, size, vma, _, _, _ = line.split()
326            except ValueError:
327                if 'ALLOC' in line:
328                    self._sections[elf_name].append([name, int(vma, 16),
329                                                     int(size, 16)])
330
331    def first_vma(self, elf_name):
332        self.read_sections(elf_name)
333        return '0x{:x}'.format(self._sections[elf_name][0][1])
334
335    def overlaps(self, section, addr, size):
336        sec_addr = section[1]
337        sec_size = section[2]
338        if not size or not sec_size:
339            return False
340        return ((addr <= (sec_addr + sec_size - 1)) and
341                ((addr + size - 1) >= sec_addr))
342
343    def sections_in_region(self, addr, size, elf_idx):
344        ret = ''
345        addr = self.subtract_load_addr(addr)
346        if not addr:
347            return ''
348        iaddr = int(addr, 16)
349        isize = int(size, 16)
350        elf = self._elfs[int(elf_idx)][0]
351        if elf is None:
352            return ''
353        self.read_sections(elf)
354        if elf not in self._sections:
355            return ''
356        for s in self._sections[elf]:
357            if self.overlaps(s, iaddr, isize):
358                ret += ' ' + s[0]
359        return ret
360
361    def reset(self):
362        self._call_stack_found = False
363        if self._addr2line:
364            self._addr2line.terminate()
365            self._addr2line = None
366        self._addr2line_elf_name = None
367        self._arch = None
368        self._saved_abort_line = ''
369        self._sections = {}  # {elf_name: [[name, addr, size], ...], ...}
370        self._regions = []   # [[addr, size, elf_idx, saved line], ...]
371        self._elfs = {0: ["tee.elf", 0]}  # {idx: [uuid, load_addr], ...}
372        self._tee_load_addr = '0x0'
373        self._func_graph_found = False
374        self._func_graph_skip_line = True
375
376    def pretty_print_path(self, path):
377        if self._strip_path:
378            return re.sub(re.escape(self._strip_path) + '/*', '', path)
379        return path
380
381    def write(self, line):
382        if self._call_stack_found:
383            match = re.search(STACK_ADDR_RE, line)
384            if match:
385                addr = match.group('addr')
386                pre = match.start('addr')
387                post = match.end('addr')
388                self._out.write(line[:pre])
389                self._out.write(addr)
390                res = self.resolve(addr)
391                res = self.pretty_print_path(res)
392                self._out.write(' ' + res)
393                self._out.write(line[post:])
394                return
395            else:
396                self.reset()
397        if self._func_graph_found:
398            match = re.search(GRAPH_ADDR_RE, line)
399            match_re = re.search(GRAPH_RE, line)
400            if match:
401                addr = match.group('addr')
402                pre = match.start('addr')
403                post = match.end('addr')
404                self._out.write(line[:pre])
405                res = self.resolve(addr)
406                res_arr = re.split(' ', res)
407                self._out.write(res_arr[0])
408                self._out.write(line[post:])
409                self._func_graph_skip_line = False
410                return
411            elif match_re:
412                self._out.write(line)
413                return
414            elif self._func_graph_skip_line:
415                return
416            else:
417                self.reset()
418        match = re.search(REGION_RE, line)
419        if match:
420            # Region table: save info for later processing once
421            # we know which UUID corresponds to which ELF index
422            addr = match.group('addr')
423            size = match.group('size')
424            elf_idx = match.group('elf_idx')
425            self._regions.append([addr, size, elf_idx, line])
426            return
427        match = re.search(ELF_LIST_RE, line)
428        if match:
429            # ELF list: save info for later. Region table and ELF list
430            # will be displayed when the call stack is reached
431            i = int(match.group('idx'))
432            self._elfs[i] = [match.group('uuid'), match.group('load_addr'),
433                             line]
434            return
435        match = re.search(TEE_LOAD_ADDR_RE, line)
436        if match:
437            self._tee_load_addr = match.group('load_addr')
438        match = re.search(CALL_STACK_RE, line)
439        if match:
440            self._call_stack_found = True
441            if self._regions:
442                for r in self._regions:
443                    r_addr = r[0]
444                    r_size = r[1]
445                    elf_idx = r[2]
446                    saved_line = r[3]
447                    if elf_idx is None:
448                        self._out.write(saved_line)
449                    else:
450                        self._out.write(saved_line.strip() +
451                                        self.sections_in_region(r_addr,
452                                                                r_size,
453                                                                elf_idx) +
454                                        '\n')
455            if self._elfs:
456                for k in self._elfs:
457                    e = self._elfs[k]
458                    if (len(e) >= 3):
459                        # TA executable or library
460                        self._out.write(e[2].strip())
461                        elf = self.get_elf(e[0])
462                        if elf:
463                            rpath = os.path.realpath(elf)
464                            path = self.pretty_print_path(rpath)
465                            self._out.write(' (' + path + ')')
466                        self._out.write('\n')
467            # Here is a good place to resolve the abort address because we
468            # have all the information we need
469            if self._saved_abort_line:
470                self._out.write(self.process_abort(self._saved_abort_line))
471        match = re.search(FUNC_GRAPH_RE, line)
472        if match:
473            self._func_graph_found = True
474        match = re.search(ABORT_ADDR_RE, line)
475        if match:
476            self.reset()
477            # At this point the arch and TA load address are unknown.
478            # Save the line so We can translate the abort address later.
479            self._saved_abort_line = line
480        self._out.write(line)
481
482    def flush(self):
483        self._out.flush()
484
485
486def main():
487    args = get_args()
488    if args.dir:
489        # Flatten list in case -d is used several times *and* with multiple
490        # arguments
491        args.dirs = [item for sublist in args.dir for item in sublist]
492    else:
493        args.dirs = []
494    symbolizer = Symbolizer(sys.stdout, args.dirs, args.strip_path)
495
496    fd = sys.stdin.fileno()
497    isatty = os.isatty(fd)
498    if isatty:
499        old = termios.tcgetattr(fd)
500        new = termios.tcgetattr(fd)
501        new[3] = new[3] & ~termios.ECHO  # lflags
502    try:
503        if isatty:
504            termios.tcsetattr(fd, termios.TCSADRAIN, new)
505        for line in sys.stdin:
506            symbolizer.write(line)
507    finally:
508        symbolizer.flush()
509        if isatty:
510            termios.tcsetattr(fd, termios.TCSADRAIN, old)
511
512
513if __name__ == "__main__":
514    main()
515