xref: /optee_os/scripts/symbolize.py (revision 24778dedd4c2ed35390d491c9f3538de7d2daf47)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: BSD-2-Clause
3#
4# Copyright (c) 2017, Linaro Limited
5#
6
7
8import argparse
9import errno
10import glob
11import os
12import re
13import subprocess
14import sys
15import termios
16
17CALL_STACK_RE = re.compile('Call stack:')
18TEE_LOAD_ADDR_RE = re.compile(r'TEE load address @ (?P<load_addr>0x[0-9a-f]+)')
19# This gets the address from lines looking like this:
20# E/TC:0  0x001044a8
21STACK_ADDR_RE = re.compile(
22    r'[UEIDFM]/(TC|LD):(\?*|[0-9]*) [0-9]* +(?P<addr>0x[0-9a-f]+)')
23ABORT_ADDR_RE = re.compile(r'-abort at address (?P<addr>0x[0-9a-f]+)')
24REGION_RE = re.compile(r'region +[0-9]+: va (?P<addr>0x[0-9a-f]+) '
25                       r'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)'
26                       r'( flags .{4} (\[(?P<elf_idx>[0-9]+)\])?)?')
27ELF_LIST_RE = re.compile(r'\[(?P<idx>[0-9]+)\] (?P<uuid>[0-9a-f\-]+)'
28                         r' @ (?P<load_addr>0x[0-9a-f\-]+)')
29FUNC_GRAPH_RE = re.compile(r'Function graph')
30GRAPH_ADDR_RE = re.compile(r'(?P<addr>0x[0-9a-f]+)')
31GRAPH_RE = re.compile(r'}')
32
33epilog = '''
34This scripts reads an OP-TEE abort or panic message from stdin and adds debug
35information to the output, such as '<function> at <file>:<line>' next to each
36address in the call stack. Any message generated by OP-TEE and containing a
37call stack can in principle be processed by this script. This currently
38includes aborts and panics from the TEE core as well as from any TA.
39The paths provided on the command line are used to locate the appropriate ELF
40binary (tee.elf or Trusted Application). The GNU binutils (addr2line, objdump,
41nm) are used to extract the debug info. If the CROSS_COMPILE environment
42variable is set, it is used as a prefix to the binutils tools. That is, the
43script will invoke $(CROSS_COMPILE)addr2line etc. If it is not set however,
44the prefix will be determined automatically for each ELF file based on its
45architecture (arm-linux-gnueabihf-, aarch64-linux-gnu-). The resulting command
46is then expected to be found in the user's PATH.
47
48OP-TEE abort and panic messages are sent to the secure console. They look like
49the following:
50
51  E/TC:0 User TA data-abort at address 0xffffdecd (alignment fault)
52  ...
53  E/TC:0 Call stack:
54  E/TC:0  0x4000549e
55  E/TC:0  0x40001f4b
56  E/TC:0  0x4000273f
57  E/TC:0  0x40005da7
58
59Inspired by a script of the same name by the Chromium project.
60
61Sample usage:
62
63  $ scripts/symbolize.py -d out/arm-plat-hikey/core -d ../optee_test/out/ta/*
64  <paste whole dump here>
65  ^D
66
67Also, this script reads function graph generated for OP-TEE user TA from
68/tmp/ftrace-<ta_uuid>.out file and resolves function addresses to corresponding
69symbols.
70
71Sample usage:
72
73  $ cat /tmp/ftrace-<ta_uuid>.out | scripts/symbolize.py -d <ta_uuid>.elf
74  <paste function graph here>
75  ^D
76'''
77
78
79def get_args():
80    parser = argparse.ArgumentParser(
81        formatter_class=argparse.RawDescriptionHelpFormatter,
82        description='Symbolizes OP-TEE abort dumps or function graphs',
83        epilog=epilog)
84    parser.add_argument('-d', '--dir', action='append', nargs='+',
85                        help='Search for ELF file in DIR. tee.elf is needed '
86                        'to decode a TEE Core or pseudo-TA abort, while '
87                        '<TA_uuid>.elf is required if a user-mode TA has '
88                        'crashed. For convenience, ELF files may also be '
89                        'given.')
90    parser.add_argument('-s', '--strip_path', nargs='?',
91                        help='Strip STRIP_PATH from file paths (default: '
92                        'current directory, use -s with no argument to show '
93                        'full paths)', default=os.getcwd())
94
95    return parser.parse_args()
96
97
98class Symbolizer(object):
99    def __init__(self, out, dirs, strip_path):
100        self._out = out
101        self._dirs = dirs
102        self._strip_path = strip_path
103        self._addr2line = None
104        self.reset()
105
106    def my_Popen(self, cmd):
107        try:
108            return subprocess.Popen(cmd, stdin=subprocess.PIPE,
109                                    stdout=subprocess.PIPE,
110                                    universal_newlines=True,
111                                    bufsize=1)
112        except OSError as e:
113            if e.errno == errno.ENOENT:
114                print("*** Error:{}: command not found".format(cmd[0]),
115                      file=sys.stderr)
116                sys.exit(1)
117
118    def get_elf(self, elf_or_uuid):
119        if not elf_or_uuid.endswith('.elf'):
120            elf_or_uuid += '.elf'
121        for d in self._dirs:
122            if d.endswith(elf_or_uuid) and os.path.isfile(d):
123                return d
124            elf = glob.glob(d + '/' + elf_or_uuid)
125            if elf:
126                return elf[0]
127
128    def set_arch(self, elf):
129        self._arch = os.getenv('CROSS_COMPILE')
130        if self._arch:
131            return
132        p = subprocess.Popen(['file', elf], stdout=subprocess.PIPE)
133        output = p.stdout.readlines()
134        p.terminate()
135        if b'ARM aarch64,' in output[0]:
136            self._arch = 'aarch64-linux-gnu-'
137        elif b'ARM,' in output[0]:
138            self._arch = 'arm-linux-gnueabihf-'
139
140    def arch_prefix(self, cmd, elf):
141        self.set_arch(elf)
142        if self._arch is None:
143            return ''
144        return self._arch + cmd
145
146    def spawn_addr2line(self, elf_name):
147        if elf_name is None:
148            return
149        if self._addr2line_elf_name is elf_name:
150            return
151        if self._addr2line:
152            self._addr2line.terminate
153            self._addr2line = None
154        elf = self.get_elf(elf_name)
155        if not elf:
156            return
157        cmd = self.arch_prefix('addr2line', elf)
158        if not cmd:
159            return
160        self._addr2line = self.my_Popen([cmd, '-f', '-p', '-e', elf])
161        self._addr2line_elf_name = elf_name
162
163    # If addr falls into a region that maps a TA ELF file, return the load
164    # address of that file.
165    def elf_load_addr(self, addr):
166        if self._regions:
167            for r in self._regions:
168                r_addr = int(r[0], 16)
169                r_size = int(r[1], 16)
170                i_addr = int(addr, 16)
171                if (i_addr >= r_addr and i_addr < (r_addr + r_size)):
172                    # Found region
173                    elf_idx = r[2]
174                    if elf_idx is not None:
175                        return self._elfs[int(elf_idx)][1]
176            # In case address is not found in TA ELF file, fallback to tee.elf
177            # especially to symbolize mixed (user-space and kernel) addresses
178            # which is true when syscall ftrace is enabled along with TA
179            # ftrace.
180            return self._tee_load_addr
181        else:
182            # tee.elf
183            return self._tee_load_addr
184
185    def elf_for_addr(self, addr):
186        l_addr = self.elf_load_addr(addr)
187        if l_addr == self._tee_load_addr:
188            return 'tee.elf'
189        for k in self._elfs:
190            e = self._elfs[k]
191            if int(e[1], 16) == int(l_addr, 16):
192                return e[0]
193        return None
194
195    def subtract_load_addr(self, addr):
196        l_addr = self.elf_load_addr(addr)
197        if l_addr is None:
198            return None
199        if int(l_addr, 16) > int(addr, 16):
200            return ''
201        return '0x{:x}'.format(int(addr, 16) - int(l_addr, 16))
202
203    def resolve(self, addr):
204        reladdr = self.subtract_load_addr(addr)
205        self.spawn_addr2line(self.elf_for_addr(addr))
206        if not reladdr or not self._addr2line:
207            return '???'
208        if self.elf_for_addr(addr) == 'tee.elf':
209            reladdr = '0x{:x}'.format(int(reladdr, 16) +
210                                      int(self.first_vma('tee.elf'), 16))
211        try:
212            print(reladdr, file=self._addr2line.stdin)
213            ret = self._addr2line.stdout.readline().rstrip('\n')
214        except IOError:
215            ret = '!!!'
216        return ret
217
218    def symbol_plus_offset(self, addr):
219        ret = ''
220        prevsize = 0
221        reladdr = self.subtract_load_addr(addr)
222        elf_name = self.elf_for_addr(addr)
223        if elf_name is None:
224            return ''
225        elf = self.get_elf(elf_name)
226        cmd = self.arch_prefix('nm', elf)
227        if not reladdr or not elf or not cmd:
228            return ''
229        ireladdr = int(reladdr, 16)
230        nm = self.my_Popen([cmd, '--numeric-sort', '--print-size', elf])
231        for line in iter(nm.stdout.readline, ''):
232            try:
233                addr, size, _, name = line.split()
234            except ValueError:
235                # Size is missing
236                try:
237                    addr, _, name = line.split()
238                    size = '0'
239                except ValueError:
240                    # E.g., undefined (external) symbols (line = "U symbol")
241                    continue
242            iaddr = int(addr, 16)
243            isize = int(size, 16)
244            if iaddr == ireladdr:
245                ret = name
246                break
247            if iaddr < ireladdr and iaddr + isize >= ireladdr:
248                offs = ireladdr - iaddr
249                ret = name + '+' + str(offs)
250                break
251            if iaddr > ireladdr and prevsize == 0:
252                offs = iaddr + ireladdr
253                ret = prevname + '+' + str(offs)
254                break
255            prevsize = size
256            prevname = name
257        nm.terminate()
258        return ret
259
260    def section_plus_offset(self, addr):
261        ret = ''
262        reladdr = self.subtract_load_addr(addr)
263        elf_name = self.elf_for_addr(addr)
264        if elf_name is None:
265            return ''
266        elf = self.get_elf(elf_name)
267        cmd = self.arch_prefix('objdump', elf)
268        if not reladdr or not elf or not cmd:
269            return ''
270        iaddr = int(reladdr, 16)
271        objdump = self.my_Popen([cmd, '--section-headers', elf])
272        for line in iter(objdump.stdout.readline, ''):
273            try:
274                idx, name, size, vma, lma, offs, algn = line.split()
275            except ValueError:
276                continue
277            ivma = int(vma, 16)
278            isize = int(size, 16)
279            if ivma == iaddr:
280                ret = name
281                break
282            if ivma < iaddr and ivma + isize >= iaddr:
283                offs = iaddr - ivma
284                ret = name + '+' + str(offs)
285                break
286        objdump.terminate()
287        return ret
288
289    def process_abort(self, line):
290        ret = ''
291        match = re.search(ABORT_ADDR_RE, line)
292        addr = match.group('addr')
293        pre = match.start('addr')
294        post = match.end('addr')
295        sym = self.symbol_plus_offset(addr)
296        sec = self.section_plus_offset(addr)
297        if sym or sec:
298            ret += line[:pre]
299            ret += addr
300            if sym:
301                ret += ' ' + sym
302            if sec:
303                ret += ' ' + sec
304            ret += line[post:]
305        return ret
306
307    # Return all ELF sections with the ALLOC flag
308    def read_sections(self, elf_name):
309        if elf_name is None:
310            return
311        if elf_name in self._sections:
312            return
313        elf = self.get_elf(elf_name)
314        cmd = self.arch_prefix('objdump', elf)
315        if not elf or not cmd:
316            return
317        self._sections[elf_name] = []
318        objdump = self.my_Popen([cmd, '--section-headers', elf])
319        for line in iter(objdump.stdout.readline, ''):
320            try:
321                _, name, size, vma, _, _, _ = line.split()
322            except ValueError:
323                if 'ALLOC' in line:
324                    self._sections[elf_name].append([name, int(vma, 16),
325                                                     int(size, 16)])
326
327    def first_vma(self, elf_name):
328        self.read_sections(elf_name)
329        return '0x{:x}'.format(self._sections[elf_name][0][1])
330
331    def overlaps(self, section, addr, size):
332        sec_addr = section[1]
333        sec_size = section[2]
334        if not size or not sec_size:
335            return False
336        return ((addr <= (sec_addr + sec_size - 1)) and
337                ((addr + size - 1) >= sec_addr))
338
339    def sections_in_region(self, addr, size, elf_idx):
340        ret = ''
341        addr = self.subtract_load_addr(addr)
342        if not addr:
343            return ''
344        iaddr = int(addr, 16)
345        isize = int(size, 16)
346        elf = self._elfs[int(elf_idx)][0]
347        if elf is None:
348            return ''
349        self.read_sections(elf)
350        if elf not in self._sections:
351            return ''
352        for s in self._sections[elf]:
353            if self.overlaps(s, iaddr, isize):
354                ret += ' ' + s[0]
355        return ret
356
357    def reset(self):
358        self._call_stack_found = False
359        if self._addr2line:
360            self._addr2line.terminate()
361            self._addr2line = None
362        self._addr2line_elf_name = None
363        self._arch = None
364        self._saved_abort_line = ''
365        self._sections = {}  # {elf_name: [[name, addr, size], ...], ...}
366        self._regions = []   # [[addr, size, elf_idx, saved line], ...]
367        self._elfs = {0: ["tee.elf", 0]}  # {idx: [uuid, load_addr], ...}
368        self._tee_load_addr = '0x0'
369        self._func_graph_found = False
370        self._func_graph_skip_line = True
371
372    def pretty_print_path(self, path):
373        if self._strip_path:
374            return re.sub(re.escape(self._strip_path) + '/*', '', path)
375        return path
376
377    def write(self, line):
378        if self._call_stack_found:
379            match = re.search(STACK_ADDR_RE, line)
380            if match:
381                addr = match.group('addr')
382                pre = match.start('addr')
383                post = match.end('addr')
384                self._out.write(line[:pre])
385                self._out.write(addr)
386                res = self.resolve(addr)
387                res = self.pretty_print_path(res)
388                self._out.write(' ' + res)
389                self._out.write(line[post:])
390                return
391            else:
392                self.reset()
393        if self._func_graph_found:
394            match = re.search(GRAPH_ADDR_RE, line)
395            match_re = re.search(GRAPH_RE, line)
396            if match:
397                addr = match.group('addr')
398                pre = match.start('addr')
399                post = match.end('addr')
400                self._out.write(line[:pre])
401                res = self.resolve(addr)
402                res_arr = re.split(' ', res)
403                self._out.write(res_arr[0])
404                self._out.write(line[post:])
405                self._func_graph_skip_line = False
406                return
407            elif match_re:
408                self._out.write(line)
409                return
410            elif self._func_graph_skip_line:
411                return
412            else:
413                self.reset()
414        match = re.search(REGION_RE, line)
415        if match:
416            # Region table: save info for later processing once
417            # we know which UUID corresponds to which ELF index
418            addr = match.group('addr')
419            size = match.group('size')
420            elf_idx = match.group('elf_idx')
421            self._regions.append([addr, size, elf_idx, line])
422            return
423        match = re.search(ELF_LIST_RE, line)
424        if match:
425            # ELF list: save info for later. Region table and ELF list
426            # will be displayed when the call stack is reached
427            i = int(match.group('idx'))
428            self._elfs[i] = [match.group('uuid'), match.group('load_addr'),
429                             line]
430            return
431        match = re.search(TEE_LOAD_ADDR_RE, line)
432        if match:
433            self._tee_load_addr = match.group('load_addr')
434        match = re.search(CALL_STACK_RE, line)
435        if match:
436            self._call_stack_found = True
437            if self._regions:
438                for r in self._regions:
439                    r_addr = r[0]
440                    r_size = r[1]
441                    elf_idx = r[2]
442                    saved_line = r[3]
443                    if elf_idx is None:
444                        self._out.write(saved_line)
445                    else:
446                        self._out.write(saved_line.strip() +
447                                        self.sections_in_region(r_addr,
448                                                                r_size,
449                                                                elf_idx) +
450                                        '\n')
451            if self._elfs:
452                for k in self._elfs:
453                    e = self._elfs[k]
454                    if (len(e) >= 3):
455                        # TA executable or library
456                        self._out.write(e[2].strip())
457                        elf = self.get_elf(e[0])
458                        if elf:
459                            rpath = os.path.realpath(elf)
460                            path = self.pretty_print_path(rpath)
461                            self._out.write(' (' + path + ')')
462                        self._out.write('\n')
463            # Here is a good place to resolve the abort address because we
464            # have all the information we need
465            if self._saved_abort_line:
466                self._out.write(self.process_abort(self._saved_abort_line))
467        match = re.search(FUNC_GRAPH_RE, line)
468        if match:
469            self._func_graph_found = True
470        match = re.search(ABORT_ADDR_RE, line)
471        if match:
472            self.reset()
473            # At this point the arch and TA load address are unknown.
474            # Save the line so We can translate the abort address later.
475            self._saved_abort_line = line
476        self._out.write(line)
477
478    def flush(self):
479        self._out.flush()
480
481
482def main():
483    args = get_args()
484    if args.dir:
485        # Flatten list in case -d is used several times *and* with multiple
486        # arguments
487        args.dirs = [item for sublist in args.dir for item in sublist]
488    else:
489        args.dirs = []
490    symbolizer = Symbolizer(sys.stdout, args.dirs, args.strip_path)
491
492    fd = sys.stdin.fileno()
493    isatty = os.isatty(fd)
494    if isatty:
495        old = termios.tcgetattr(fd)
496        new = termios.tcgetattr(fd)
497        new[3] = new[3] & ~termios.ECHO  # lflags
498    try:
499        if isatty:
500            termios.tcsetattr(fd, termios.TCSADRAIN, new)
501        for line in sys.stdin:
502            symbolizer.write(line)
503    finally:
504        symbolizer.flush()
505        if isatty:
506            termios.tcsetattr(fd, termios.TCSADRAIN, old)
507
508
509if __name__ == "__main__":
510    main()
511