xref: /optee_os/scripts/symbolize.py (revision 099918f6744c37ce693c38562f11466b19d573c9)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: BSD-2-Clause
3#
4# Copyright (c) 2017, Linaro Limited
5#
6
7
8import argparse
9import errno
10import glob
11import os
12import re
13import subprocess
14import sys
15import termios
16
17CALL_STACK_RE = re.compile('Call stack:')
18# This gets the address from lines looking like this:
19# E/TC:0  0x001044a8
20STACK_ADDR_RE = re.compile(
21    r'[UEIDFM]/(TC|LD):(\?*|[0-9]*) [0-9]* +(?P<addr>0x[0-9a-f]+)')
22ABORT_ADDR_RE = re.compile(r'-abort at address (?P<addr>0x[0-9a-f]+)')
23REGION_RE = re.compile(r'region +[0-9]+: va (?P<addr>0x[0-9a-f]+) '
24                       r'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)'
25                       r'( flags .{4} (\[(?P<elf_idx>[0-9]+)\])?)?')
26ELF_LIST_RE = re.compile(r'\[(?P<idx>[0-9]+)\] (?P<uuid>[0-9a-f\-]+)'
27                         r' @ (?P<load_addr>0x[0-9a-f\-]+)')
28FUNC_GRAPH_RE = re.compile(r'Function graph')
29GRAPH_ADDR_RE = re.compile(r'(?P<addr>0x[0-9a-f]+)')
30GRAPH_RE = re.compile(r'}')
31
32epilog = '''
33This scripts reads an OP-TEE abort or panic message from stdin and adds debug
34information to the output, such as '<function> at <file>:<line>' next to each
35address in the call stack. Any message generated by OP-TEE and containing a
36call stack can in principle be processed by this script. This currently
37includes aborts and panics from the TEE core as well as from any TA.
38The paths provided on the command line are used to locate the appropriate ELF
39binary (tee.elf or Trusted Application). The GNU binutils (addr2line, objdump,
40nm) are used to extract the debug info. If the CROSS_COMPILE environment
41variable is set, it is used as a prefix to the binutils tools. That is, the
42script will invoke $(CROSS_COMPILE)addr2line etc. If it is not set however,
43the prefix will be determined automatically for each ELF file based on its
44architecture (arm-linux-gnueabihf-, aarch64-linux-gnu-). The resulting command
45is then expected to be found in the user's PATH.
46
47OP-TEE abort and panic messages are sent to the secure console. They look like
48the following:
49
50  E/TC:0 User TA data-abort at address 0xffffdecd (alignment fault)
51  ...
52  E/TC:0 Call stack:
53  E/TC:0  0x4000549e
54  E/TC:0  0x40001f4b
55  E/TC:0  0x4000273f
56  E/TC:0  0x40005da7
57
58Inspired by a script of the same name by the Chromium project.
59
60Sample usage:
61
62  $ scripts/symbolize.py -d out/arm-plat-hikey/core -d ../optee_test/out/ta/*
63  <paste whole dump here>
64  ^D
65
66Also, this script reads function graph generated for OP-TEE user TA from
67/tmp/ftrace-<ta_uuid>.out file and resolves function addresses to corresponding
68symbols.
69
70Sample usage:
71
72  $ cat /tmp/ftrace-<ta_uuid>.out | scripts/symbolize.py -d <ta_uuid>.elf
73  <paste function graph here>
74  ^D
75'''
76
77
78def get_args():
79    parser = argparse.ArgumentParser(
80        formatter_class=argparse.RawDescriptionHelpFormatter,
81        description='Symbolizes OP-TEE abort dumps or function graphs',
82        epilog=epilog)
83    parser.add_argument('-d', '--dir', action='append', nargs='+',
84                        help='Search for ELF file in DIR. tee.elf is needed '
85                        'to decode a TEE Core or pseudo-TA abort, while '
86                        '<TA_uuid>.elf is required if a user-mode TA has '
87                        'crashed. For convenience, ELF files may also be '
88                        'given.')
89    parser.add_argument('-s', '--strip_path', nargs='?',
90                        help='Strip STRIP_PATH from file paths (default: '
91                        'current directory, use -s with no argument to show '
92                        'full paths)', default=os.getcwd())
93
94    return parser.parse_args()
95
96
97class Symbolizer(object):
98    def __init__(self, out, dirs, strip_path):
99        self._out = out
100        self._dirs = dirs
101        self._strip_path = strip_path
102        self._addr2line = None
103        self.reset()
104
105    def my_Popen(self, cmd):
106        try:
107            return subprocess.Popen(cmd, stdin=subprocess.PIPE,
108                                    stdout=subprocess.PIPE, text=True,
109                                    bufsize=1)
110        except OSError as e:
111            if e.errno == errno.ENOENT:
112                print("*** Error:{}: command not found".format(cmd[0]),
113                      file=sys.stderr)
114                sys.exit(1)
115
116    def get_elf(self, elf_or_uuid):
117        if not elf_or_uuid.endswith('.elf'):
118            elf_or_uuid += '.elf'
119        for d in self._dirs:
120            if d.endswith(elf_or_uuid) and os.path.isfile(d):
121                return d
122            elf = glob.glob(d + '/' + elf_or_uuid)
123            if elf:
124                return elf[0]
125
126    def set_arch(self):
127        if self._arch:
128            return
129        self._arch = os.getenv('CROSS_COMPILE')
130        if self._arch:
131            return
132        elf = self.get_elf(self._elfs[0][0])
133        if elf is None:
134            return
135        p = subprocess.Popen(['file', self.get_elf(self._elfs[0][0])],
136                             stdout=subprocess.PIPE)
137        output = p.stdout.readlines()
138        p.terminate()
139        if b'ARM aarch64,' in output[0]:
140            self._arch = 'aarch64-linux-gnu-'
141        elif b'ARM,' in output[0]:
142            self._arch = 'arm-linux-gnueabihf-'
143
144    def arch_prefix(self, cmd):
145        self.set_arch()
146        if self._arch is None:
147            return ''
148        return self._arch + cmd
149
150    def spawn_addr2line(self, elf_name):
151        if elf_name is None:
152            return
153        if self._addr2line_elf_name is elf_name:
154            return
155        if self._addr2line:
156            self._addr2line.terminate
157            self._addr2line = None
158        elf = self.get_elf(elf_name)
159        if not elf:
160            return
161        cmd = self.arch_prefix('addr2line')
162        if not cmd:
163            return
164        self._addr2line = self.my_Popen([cmd, '-f', '-p', '-e', elf])
165        self._addr2line_elf_name = elf_name
166
167    # If addr falls into a region that maps a TA ELF file, return the load
168    # address of that file.
169    def elf_load_addr(self, addr):
170        if self._regions:
171            for r in self._regions:
172                r_addr = int(r[0], 16)
173                r_size = int(r[1], 16)
174                i_addr = int(addr, 16)
175                if (i_addr >= r_addr and i_addr < (r_addr + r_size)):
176                    # Found region
177                    elf_idx = r[2]
178                    if elf_idx is not None:
179                        return self._elfs[int(elf_idx)][1]
180            # In case address is not found in TA ELF file, fallback to tee.elf
181            # especially to symbolize mixed (user-space and kernel) addresses
182            # which is true when syscall ftrace is enabled along with TA
183            # ftrace.
184            return '0x0'
185        else:
186            # tee.elf
187            return '0x0'
188
189    def elf_for_addr(self, addr):
190        l_addr = self.elf_load_addr(addr)
191        if l_addr is None:
192            return None
193        if l_addr is '0x0':
194            return 'tee.elf'
195        for k in self._elfs:
196            e = self._elfs[k]
197            if int(e[1], 16) == int(l_addr, 16):
198                return e[0]
199        return None
200
201    def subtract_load_addr(self, addr):
202        l_addr = self.elf_load_addr(addr)
203        if l_addr is None:
204            return None
205        if int(l_addr, 16) > int(addr, 16):
206            return ''
207        return '0x{:x}'.format(int(addr, 16) - int(l_addr, 16))
208
209    def resolve(self, addr):
210        reladdr = self.subtract_load_addr(addr)
211        self.spawn_addr2line(self.elf_for_addr(addr))
212        if not reladdr or not self._addr2line:
213            return '???'
214        try:
215            print(reladdr, file=self._addr2line.stdin)
216            ret = self._addr2line.stdout.readline().rstrip('\n')
217        except IOError:
218            ret = '!!!'
219        return ret
220
221    def symbol_plus_offset(self, addr):
222        ret = ''
223        prevsize = 0
224        reladdr = self.subtract_load_addr(addr)
225        elf_name = self.elf_for_addr(addr)
226        if elf_name is None:
227            return ''
228        elf = self.get_elf(elf_name)
229        cmd = self.arch_prefix('nm')
230        if not reladdr or not elf or not cmd:
231            return ''
232        ireladdr = int(reladdr, 16)
233        nm = self.my_Popen([cmd, '--numeric-sort', '--print-size', elf])
234        for line in iter(nm.stdout.readline, ''):
235            try:
236                addr, size, _, name = line.split()
237            except ValueError:
238                # Size is missing
239                try:
240                    addr, _, name = line.split()
241                    size = '0'
242                except ValueError:
243                    # E.g., undefined (external) symbols (line = "U symbol")
244                    continue
245            iaddr = int(addr, 16)
246            isize = int(size, 16)
247            if iaddr == ireladdr:
248                ret = name
249                break
250            if iaddr < ireladdr and iaddr + isize >= ireladdr:
251                offs = ireladdr - iaddr
252                ret = name + '+' + str(offs)
253                break
254            if iaddr > ireladdr and prevsize == 0:
255                offs = iaddr + ireladdr
256                ret = prevname + '+' + str(offs)
257                break
258            prevsize = size
259            prevname = name
260        nm.terminate()
261        return ret
262
263    def section_plus_offset(self, addr):
264        ret = ''
265        reladdr = self.subtract_load_addr(addr)
266        elf_name = self.elf_for_addr(addr)
267        if elf_name is None:
268            return ''
269        elf = self.get_elf(elf_name)
270        cmd = self.arch_prefix('objdump')
271        if not reladdr or not elf or not cmd:
272            return ''
273        iaddr = int(reladdr, 16)
274        objdump = self.my_Popen([cmd, '--section-headers', elf])
275        for line in iter(objdump.stdout.readline, ''):
276            try:
277                idx, name, size, vma, lma, offs, algn = line.split()
278            except ValueError:
279                continue
280            ivma = int(vma, 16)
281            isize = int(size, 16)
282            if ivma == iaddr:
283                ret = name
284                break
285            if ivma < iaddr and ivma + isize >= iaddr:
286                offs = iaddr - ivma
287                ret = name + '+' + str(offs)
288                break
289        objdump.terminate()
290        return ret
291
292    def process_abort(self, line):
293        ret = ''
294        match = re.search(ABORT_ADDR_RE, line)
295        addr = match.group('addr')
296        pre = match.start('addr')
297        post = match.end('addr')
298        sym = self.symbol_plus_offset(addr)
299        sec = self.section_plus_offset(addr)
300        if sym or sec:
301            ret += line[:pre]
302            ret += addr
303            if sym:
304                ret += ' ' + sym
305            if sec:
306                ret += ' ' + sec
307            ret += line[post:]
308        return ret
309
310    # Return all ELF sections with the ALLOC flag
311    def read_sections(self, elf_name):
312        if elf_name is None:
313            return
314        if elf_name in self._sections:
315            return
316        elf = self.get_elf(elf_name)
317        cmd = self.arch_prefix('objdump')
318        if not elf or not cmd:
319            return
320        self._sections[elf_name] = []
321        objdump = self.my_Popen([cmd, '--section-headers', elf])
322        for line in iter(objdump.stdout.readline, ''):
323            try:
324                _, name, size, vma, _, _, _ = line.split()
325            except ValueError:
326                if 'ALLOC' in line:
327                    self._sections[elf_name].append([name, int(vma, 16),
328                                                     int(size, 16)])
329
330    def overlaps(self, section, addr, size):
331        sec_addr = section[1]
332        sec_size = section[2]
333        if not size or not sec_size:
334            return False
335        return ((addr <= (sec_addr + sec_size - 1)) and
336                ((addr + size - 1) >= sec_addr))
337
338    def sections_in_region(self, addr, size, elf_idx):
339        ret = ''
340        addr = self.subtract_load_addr(addr)
341        if not addr:
342            return ''
343        iaddr = int(addr, 16)
344        isize = int(size, 16)
345        elf = self._elfs[int(elf_idx)][0]
346        if elf is None:
347            return ''
348        self.read_sections(elf)
349        if elf not in self._sections:
350            return ''
351        for s in self._sections[elf]:
352            if self.overlaps(s, iaddr, isize):
353                ret += ' ' + s[0]
354        return ret
355
356    def reset(self):
357        self._call_stack_found = False
358        if self._addr2line:
359            self._addr2line.terminate()
360            self._addr2line = None
361        self._addr2line_elf_name = None
362        self._arch = None
363        self._saved_abort_line = ''
364        self._sections = {}  # {elf_name: [[name, addr, size], ...], ...}
365        self._regions = []   # [[addr, size, elf_idx, saved line], ...]
366        self._elfs = {0: ["tee.elf", 0]}  # {idx: [uuid, load_addr], ...}
367        self._func_graph_found = False
368        self._func_graph_skip_line = True
369
370    def pretty_print_path(self, path):
371        if self._strip_path:
372            return re.sub(re.escape(self._strip_path) + '/*', '', path)
373        return path
374
375    def write(self, line):
376        if self._call_stack_found:
377            match = re.search(STACK_ADDR_RE, line)
378            if match:
379                addr = match.group('addr')
380                pre = match.start('addr')
381                post = match.end('addr')
382                self._out.write(line[:pre])
383                self._out.write(addr)
384                res = self.resolve(addr)
385                res = self.pretty_print_path(res)
386                self._out.write(' ' + res)
387                self._out.write(line[post:])
388                return
389            else:
390                self.reset()
391        if self._func_graph_found:
392            match = re.search(GRAPH_ADDR_RE, line)
393            match_re = re.search(GRAPH_RE, line)
394            if match:
395                addr = match.group('addr')
396                pre = match.start('addr')
397                post = match.end('addr')
398                self._out.write(line[:pre])
399                res = self.resolve(addr)
400                res_arr = re.split(' ', res)
401                self._out.write(res_arr[0])
402                self._out.write(line[post:])
403                self._func_graph_skip_line = False
404                return
405            elif match_re:
406                self._out.write(line)
407                return
408            elif self._func_graph_skip_line:
409                return
410            else:
411                self.reset()
412        match = re.search(REGION_RE, line)
413        if match:
414            # Region table: save info for later processing once
415            # we know which UUID corresponds to which ELF index
416            addr = match.group('addr')
417            size = match.group('size')
418            elf_idx = match.group('elf_idx')
419            self._regions.append([addr, size, elf_idx, line])
420            return
421        match = re.search(ELF_LIST_RE, line)
422        if match:
423            # ELF list: save info for later. Region table and ELF list
424            # will be displayed when the call stack is reached
425            i = int(match.group('idx'))
426            self._elfs[i] = [match.group('uuid'), match.group('load_addr'),
427                             line]
428            return
429        match = re.search(CALL_STACK_RE, line)
430        if match:
431            self._call_stack_found = True
432            if self._regions:
433                for r in self._regions:
434                    r_addr = r[0]
435                    r_size = r[1]
436                    elf_idx = r[2]
437                    saved_line = r[3]
438                    if elf_idx is None:
439                        self._out.write(saved_line)
440                    else:
441                        self._out.write(saved_line.strip() +
442                                        self.sections_in_region(r_addr,
443                                                                r_size,
444                                                                elf_idx) +
445                                        '\n')
446            if self._elfs:
447                for k in self._elfs:
448                    e = self._elfs[k]
449                    if (len(e) >= 3):
450                        # TA executable or library
451                        self._out.write(e[2].strip())
452                        elf = self.get_elf(e[0])
453                        if elf:
454                            rpath = os.path.realpath(elf)
455                            path = self.pretty_print_path(rpath)
456                            self._out.write(' (' + path + ')')
457                        self._out.write('\n')
458            # Here is a good place to resolve the abort address because we
459            # have all the information we need
460            if self._saved_abort_line:
461                self._out.write(self.process_abort(self._saved_abort_line))
462        match = re.search(FUNC_GRAPH_RE, line)
463        if match:
464            self._func_graph_found = True
465        match = re.search(ABORT_ADDR_RE, line)
466        if match:
467            self.reset()
468            # At this point the arch and TA load address are unknown.
469            # Save the line so We can translate the abort address later.
470            self._saved_abort_line = line
471        self._out.write(line)
472
473    def flush(self):
474        self._out.flush()
475
476
477def main():
478    args = get_args()
479    if args.dir:
480        # Flatten list in case -d is used several times *and* with multiple
481        # arguments
482        args.dirs = [item for sublist in args.dir for item in sublist]
483    else:
484        args.dirs = []
485    symbolizer = Symbolizer(sys.stdout, args.dirs, args.strip_path)
486
487    fd = sys.stdin.fileno()
488    isatty = os.isatty(fd)
489    if isatty:
490        old = termios.tcgetattr(fd)
491        new = termios.tcgetattr(fd)
492        new[3] = new[3] & ~termios.ECHO  # lflags
493    try:
494        if isatty:
495            termios.tcsetattr(fd, termios.TCSADRAIN, new)
496        for line in sys.stdin:
497            symbolizer.write(line)
498    finally:
499        symbolizer.flush()
500        if isatty:
501            termios.tcsetattr(fd, termios.TCSADRAIN, old)
502
503
504if __name__ == "__main__":
505    main()
506