xref: /optee_os/scripts/symbolize.py (revision 105e09c24479903c90e2f086fdefd540950fc728)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: BSD-2-Clause
3#
4# Copyright (c) 2017, Linaro Limited
5#
6
7
8import argparse
9import errno
10import glob
11import os
12import re
13import subprocess
14import sys
15import termios
16
17CALL_STACK_RE = re.compile('Call stack:')
18TEE_LOAD_ADDR_RE = re.compile(r'Load address @ (?P<load_addr>0x[0-9a-f]+)')
19# This gets the address from lines looking like this:
20# E/TC:0  0x001044a8
21STACK_ADDR_RE = re.compile(
22    r'[UEIDFM]/(TC|LD):(\?*|[0-9]*) [0-9]* +(?P<addr>0x[0-9a-f]+)')
23ABORT_ADDR_RE = re.compile(r'-abort at address (?P<addr>0x[0-9a-f]+)')
24REGION_RE = re.compile(r'region +[0-9]+: va (?P<addr>0x[0-9a-f]+) '
25                       r'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)'
26                       r'( flags .{4} (\[(?P<elf_idx>[0-9]+)\])?)?')
27ELF_LIST_RE = re.compile(r'\[(?P<idx>[0-9]+)\] (?P<uuid>[0-9a-f\-]+)'
28                         r' @ (?P<load_addr>0x[0-9a-f\-]+)')
29FUNC_GRAPH_RE = re.compile(r'Function graph')
30GRAPH_ADDR_RE = re.compile(r'(?P<addr>0x[0-9a-f]+)')
31GRAPH_RE = re.compile(r'}')
32
33epilog = '''
34This scripts reads an OP-TEE abort or panic message from stdin and adds debug
35information to the output, such as '<function> at <file>:<line>' next to each
36address in the call stack. Any message generated by OP-TEE and containing a
37call stack can in principle be processed by this script. This currently
38includes aborts and panics from the TEE core as well as from any TA.
39The paths provided on the command line are used to locate the appropriate ELF
40binary (tee.elf or Trusted Application). The GNU binutils (addr2line, objdump,
41nm) are used to extract the debug info. If the CROSS_COMPILE environment
42variable is set, it is used as a prefix to the binutils tools. That is, the
43script will invoke $(CROSS_COMPILE)addr2line etc. If it is not set however,
44the prefix will be determined automatically for each ELF file based on its
45architecture (arm-linux-gnueabihf-, aarch64-linux-gnu-). The resulting command
46is then expected to be found in the user's PATH.
47
48OP-TEE abort and panic messages are sent to the secure console. They look like
49the following:
50
51  E/TC:0 User TA data-abort at address 0xffffdecd (alignment fault)
52  ...
53  E/TC:0 Call stack:
54  E/TC:0  0x4000549e
55  E/TC:0  0x40001f4b
56  E/TC:0  0x4000273f
57  E/TC:0  0x40005da7
58
59Inspired by a script of the same name by the Chromium project.
60
61Sample usage:
62
63  $ scripts/symbolize.py -d out/arm-plat-hikey/core -d ../optee_test/out/ta/*
64  <paste whole dump here>
65  ^D
66
67Also, this script reads function graph generated for OP-TEE user TA from
68/tmp/ftrace-<ta_uuid>.out file and resolves function addresses to corresponding
69symbols.
70
71Sample usage:
72
73  $ cat /tmp/ftrace-<ta_uuid>.out | scripts/symbolize.py -d <ta_uuid>.elf
74  <paste function graph here>
75  ^D
76'''
77
78
79def get_args():
80    parser = argparse.ArgumentParser(
81        formatter_class=argparse.RawDescriptionHelpFormatter,
82        description='Symbolizes OP-TEE abort dumps or function graphs',
83        epilog=epilog)
84    parser.add_argument('-d', '--dir', action='append', nargs='+',
85                        help='Search for ELF file in DIR. tee.elf is needed '
86                        'to decode a TEE Core or pseudo-TA abort, while '
87                        '<TA_uuid>.elf is required if a user-mode TA has '
88                        'crashed. For convenience, ELF files may also be '
89                        'given.')
90    parser.add_argument('-s', '--strip_path', nargs='?',
91                        help='Strip STRIP_PATH from file paths (default: '
92                        'current directory, use -s with no argument to show '
93                        'full paths)', default=os.getcwd())
94
95    return parser.parse_args()
96
97
98class Symbolizer(object):
99    def __init__(self, out, dirs, strip_path):
100        self._out = out
101        self._dirs = dirs
102        self._strip_path = strip_path
103        self._addr2line = None
104        self.reset()
105
106    def my_Popen(self, cmd):
107        try:
108            return subprocess.Popen(cmd, stdin=subprocess.PIPE,
109                                    stdout=subprocess.PIPE, text=True,
110                                    bufsize=1)
111        except OSError as e:
112            if e.errno == errno.ENOENT:
113                print("*** Error:{}: command not found".format(cmd[0]),
114                      file=sys.stderr)
115                sys.exit(1)
116
117    def get_elf(self, elf_or_uuid):
118        if not elf_or_uuid.endswith('.elf'):
119            elf_or_uuid += '.elf'
120        for d in self._dirs:
121            if d.endswith(elf_or_uuid) and os.path.isfile(d):
122                return d
123            elf = glob.glob(d + '/' + elf_or_uuid)
124            if elf:
125                return elf[0]
126
127    def set_arch(self):
128        if self._arch:
129            return
130        self._arch = os.getenv('CROSS_COMPILE')
131        if self._arch:
132            return
133        elf = self.get_elf(self._elfs[0][0])
134        if elf is None:
135            return
136        p = subprocess.Popen(['file', self.get_elf(self._elfs[0][0])],
137                             stdout=subprocess.PIPE)
138        output = p.stdout.readlines()
139        p.terminate()
140        if b'ARM aarch64,' in output[0]:
141            self._arch = 'aarch64-linux-gnu-'
142        elif b'ARM,' in output[0]:
143            self._arch = 'arm-linux-gnueabihf-'
144
145    def arch_prefix(self, cmd):
146        self.set_arch()
147        if self._arch is None:
148            return ''
149        return self._arch + cmd
150
151    def spawn_addr2line(self, elf_name):
152        if elf_name is None:
153            return
154        if self._addr2line_elf_name is elf_name:
155            return
156        if self._addr2line:
157            self._addr2line.terminate
158            self._addr2line = None
159        elf = self.get_elf(elf_name)
160        if not elf:
161            return
162        cmd = self.arch_prefix('addr2line')
163        if not cmd:
164            return
165        args = [cmd]
166        if elf_name == 'tee.elf' and self._tee_load_addr != '0x0':
167            args += ['-j.text']
168        args += ['-f', '-p', '-e', elf]
169        self._addr2line = self.my_Popen(args)
170        self._addr2line_elf_name = elf_name
171
172    # If addr falls into a region that maps a TA ELF file, return the load
173    # address of that file.
174    def elf_load_addr(self, addr):
175        if self._regions:
176            for r in self._regions:
177                r_addr = int(r[0], 16)
178                r_size = int(r[1], 16)
179                i_addr = int(addr, 16)
180                if (i_addr >= r_addr and i_addr < (r_addr + r_size)):
181                    # Found region
182                    elf_idx = r[2]
183                    if elf_idx is not None:
184                        return self._elfs[int(elf_idx)][1]
185            # In case address is not found in TA ELF file, fallback to tee.elf
186            # especially to symbolize mixed (user-space and kernel) addresses
187            # which is true when syscall ftrace is enabled along with TA
188            # ftrace.
189            return '0x0'
190        else:
191            # tee.elf
192            return self._tee_load_addr
193
194    def elf_for_addr(self, addr):
195        if not self._regions:
196            return 'tee.elf'
197        l_addr = self.elf_load_addr(addr)
198        if l_addr is None:
199            return None
200        for k in self._elfs:
201            e = self._elfs[k]
202            if int(e[1], 16) == int(l_addr, 16):
203                return e[0]
204        return None
205
206    def subtract_load_addr(self, addr):
207        l_addr = self.elf_load_addr(addr)
208        if l_addr is None:
209            return None
210        if int(l_addr, 16) > int(addr, 16):
211            return ''
212        return '0x{:x}'.format(int(addr, 16) - int(l_addr, 16))
213
214    def resolve(self, addr):
215        reladdr = self.subtract_load_addr(addr)
216        self.spawn_addr2line(self.elf_for_addr(addr))
217        if not reladdr or not self._addr2line:
218            return '???'
219        try:
220            print(reladdr, file=self._addr2line.stdin)
221            ret = self._addr2line.stdout.readline().rstrip('\n')
222        except IOError:
223            ret = '!!!'
224        return ret
225
226    def symbol_plus_offset(self, addr):
227        ret = ''
228        prevsize = 0
229        reladdr = self.subtract_load_addr(addr)
230        elf_name = self.elf_for_addr(addr)
231        if elf_name is None:
232            return ''
233        elf = self.get_elf(elf_name)
234        cmd = self.arch_prefix('nm')
235        if not reladdr or not elf or not cmd:
236            return ''
237        ireladdr = int(reladdr, 16)
238        nm = self.my_Popen([cmd, '--numeric-sort', '--print-size', elf])
239        for line in iter(nm.stdout.readline, ''):
240            try:
241                addr, size, _, name = line.split()
242            except ValueError:
243                # Size is missing
244                try:
245                    addr, _, name = line.split()
246                    size = '0'
247                except ValueError:
248                    # E.g., undefined (external) symbols (line = "U symbol")
249                    continue
250            iaddr = int(addr, 16)
251            isize = int(size, 16)
252            if iaddr == ireladdr:
253                ret = name
254                break
255            if iaddr < ireladdr and iaddr + isize >= ireladdr:
256                offs = ireladdr - iaddr
257                ret = name + '+' + str(offs)
258                break
259            if iaddr > ireladdr and prevsize == 0:
260                offs = iaddr + ireladdr
261                ret = prevname + '+' + str(offs)
262                break
263            prevsize = size
264            prevname = name
265        nm.terminate()
266        return ret
267
268    def section_plus_offset(self, addr):
269        ret = ''
270        reladdr = self.subtract_load_addr(addr)
271        elf_name = self.elf_for_addr(addr)
272        if elf_name is None:
273            return ''
274        elf = self.get_elf(elf_name)
275        cmd = self.arch_prefix('objdump')
276        if not reladdr or not elf or not cmd:
277            return ''
278        iaddr = int(reladdr, 16)
279        objdump = self.my_Popen([cmd, '--section-headers', elf])
280        for line in iter(objdump.stdout.readline, ''):
281            try:
282                idx, name, size, vma, lma, offs, algn = line.split()
283            except ValueError:
284                continue
285            ivma = int(vma, 16)
286            isize = int(size, 16)
287            if ivma == iaddr:
288                ret = name
289                break
290            if ivma < iaddr and ivma + isize >= iaddr:
291                offs = iaddr - ivma
292                ret = name + '+' + str(offs)
293                break
294        objdump.terminate()
295        return ret
296
297    def process_abort(self, line):
298        ret = ''
299        match = re.search(ABORT_ADDR_RE, line)
300        addr = match.group('addr')
301        pre = match.start('addr')
302        post = match.end('addr')
303        sym = self.symbol_plus_offset(addr)
304        sec = self.section_plus_offset(addr)
305        if sym or sec:
306            ret += line[:pre]
307            ret += addr
308            if sym:
309                ret += ' ' + sym
310            if sec:
311                ret += ' ' + sec
312            ret += line[post:]
313        return ret
314
315    # Return all ELF sections with the ALLOC flag
316    def read_sections(self, elf_name):
317        if elf_name is None:
318            return
319        if elf_name in self._sections:
320            return
321        elf = self.get_elf(elf_name)
322        cmd = self.arch_prefix('objdump')
323        if not elf or not cmd:
324            return
325        self._sections[elf_name] = []
326        objdump = self.my_Popen([cmd, '--section-headers', elf])
327        for line in iter(objdump.stdout.readline, ''):
328            try:
329                _, name, size, vma, _, _, _ = line.split()
330            except ValueError:
331                if 'ALLOC' in line:
332                    self._sections[elf_name].append([name, int(vma, 16),
333                                                     int(size, 16)])
334
335    def overlaps(self, section, addr, size):
336        sec_addr = section[1]
337        sec_size = section[2]
338        if not size or not sec_size:
339            return False
340        return ((addr <= (sec_addr + sec_size - 1)) and
341                ((addr + size - 1) >= sec_addr))
342
343    def sections_in_region(self, addr, size, elf_idx):
344        ret = ''
345        addr = self.subtract_load_addr(addr)
346        if not addr:
347            return ''
348        iaddr = int(addr, 16)
349        isize = int(size, 16)
350        elf = self._elfs[int(elf_idx)][0]
351        if elf is None:
352            return ''
353        self.read_sections(elf)
354        if elf not in self._sections:
355            return ''
356        for s in self._sections[elf]:
357            if self.overlaps(s, iaddr, isize):
358                ret += ' ' + s[0]
359        return ret
360
361    def reset(self):
362        self._call_stack_found = False
363        if self._addr2line:
364            self._addr2line.terminate()
365            self._addr2line = None
366        self._addr2line_elf_name = None
367        self._arch = None
368        self._saved_abort_line = ''
369        self._sections = {}  # {elf_name: [[name, addr, size], ...], ...}
370        self._regions = []   # [[addr, size, elf_idx, saved line], ...]
371        self._elfs = {0: ["tee.elf", 0]}  # {idx: [uuid, load_addr], ...}
372        self._tee_load_addr = 0x0
373        self._func_graph_found = False
374        self._func_graph_skip_line = True
375
376    def pretty_print_path(self, path):
377        if self._strip_path:
378            return re.sub(re.escape(self._strip_path) + '/*', '', path)
379        return path
380
381    def write(self, line):
382        if self._call_stack_found:
383            match = re.search(STACK_ADDR_RE, line)
384            if match:
385                addr = match.group('addr')
386                pre = match.start('addr')
387                post = match.end('addr')
388                self._out.write(line[:pre])
389                self._out.write(addr)
390                res = self.resolve(addr)
391                res = self.pretty_print_path(res)
392                self._out.write(' ' + res)
393                self._out.write(line[post:])
394                return
395            else:
396                self.reset()
397        if self._func_graph_found:
398            match = re.search(GRAPH_ADDR_RE, line)
399            match_re = re.search(GRAPH_RE, line)
400            if match:
401                addr = match.group('addr')
402                pre = match.start('addr')
403                post = match.end('addr')
404                self._out.write(line[:pre])
405                res = self.resolve(addr)
406                res_arr = re.split(' ', res)
407                self._out.write(res_arr[0])
408                self._out.write(line[post:])
409                self._func_graph_skip_line = False
410                return
411            elif match_re:
412                self._out.write(line)
413                return
414            elif self._func_graph_skip_line:
415                return
416            else:
417                self.reset()
418        match = re.search(REGION_RE, line)
419        if match:
420            # Region table: save info for later processing once
421            # we know which UUID corresponds to which ELF index
422            addr = match.group('addr')
423            size = match.group('size')
424            elf_idx = match.group('elf_idx')
425            self._regions.append([addr, size, elf_idx, line])
426            return
427        match = re.search(ELF_LIST_RE, line)
428        if match:
429            # ELF list: save info for later. Region table and ELF list
430            # will be displayed when the call stack is reached
431            i = int(match.group('idx'))
432            self._elfs[i] = [match.group('uuid'), match.group('load_addr'),
433                             line]
434            return
435        match = re.search(TEE_LOAD_ADDR_RE, line)
436        if match:
437            self._tee_load_addr = match.group('load_addr')
438        match = re.search(CALL_STACK_RE, line)
439        if match:
440            self._call_stack_found = True
441            if self._regions:
442                for r in self._regions:
443                    r_addr = r[0]
444                    r_size = r[1]
445                    elf_idx = r[2]
446                    saved_line = r[3]
447                    if elf_idx is None:
448                        self._out.write(saved_line)
449                    else:
450                        self._out.write(saved_line.strip() +
451                                        self.sections_in_region(r_addr,
452                                                                r_size,
453                                                                elf_idx) +
454                                        '\n')
455            if self._elfs:
456                for k in self._elfs:
457                    e = self._elfs[k]
458                    if (len(e) >= 3):
459                        # TA executable or library
460                        self._out.write(e[2].strip())
461                        elf = self.get_elf(e[0])
462                        if elf:
463                            rpath = os.path.realpath(elf)
464                            path = self.pretty_print_path(rpath)
465                            self._out.write(' (' + path + ')')
466                        self._out.write('\n')
467            # Here is a good place to resolve the abort address because we
468            # have all the information we need
469            if self._saved_abort_line:
470                self._out.write(self.process_abort(self._saved_abort_line))
471        match = re.search(FUNC_GRAPH_RE, line)
472        if match:
473            self._func_graph_found = True
474        match = re.search(ABORT_ADDR_RE, line)
475        if match:
476            self.reset()
477            # At this point the arch and TA load address are unknown.
478            # Save the line so We can translate the abort address later.
479            self._saved_abort_line = line
480        self._out.write(line)
481
482    def flush(self):
483        self._out.flush()
484
485
486def main():
487    args = get_args()
488    if args.dir:
489        # Flatten list in case -d is used several times *and* with multiple
490        # arguments
491        args.dirs = [item for sublist in args.dir for item in sublist]
492    else:
493        args.dirs = []
494    symbolizer = Symbolizer(sys.stdout, args.dirs, args.strip_path)
495
496    fd = sys.stdin.fileno()
497    isatty = os.isatty(fd)
498    if isatty:
499        old = termios.tcgetattr(fd)
500        new = termios.tcgetattr(fd)
501        new[3] = new[3] & ~termios.ECHO  # lflags
502    try:
503        if isatty:
504            termios.tcsetattr(fd, termios.TCSADRAIN, new)
505        for line in sys.stdin:
506            symbolizer.write(line)
507    finally:
508        symbolizer.flush()
509        if isatty:
510            termios.tcsetattr(fd, termios.TCSADRAIN, old)
511
512
513if __name__ == "__main__":
514    main()
515