xref: /optee_os/scripts/symbolize.py (revision 20d152b8d86d84259f382aef3a0d10d489bf17cb)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: BSD-2-Clause
3#
4# Copyright (c) 2017, Linaro Limited
5#
6
7
8import argparse
9import errno
10import glob
11import os
12import re
13import subprocess
14import sys
15import termios
16
17CALL_STACK_RE = re.compile('Call stack:')
18# This gets the address from lines looking like this:
19# E/TC:0  0x001044a8
20STACK_ADDR_RE = re.compile(
21    r'[UEIDFM]/(TC|LD):(\?*|[0-9]*) [0-9]* +(?P<addr>0x[0-9a-f]+)')
22ABORT_ADDR_RE = re.compile(r'-abort at address (?P<addr>0x[0-9a-f]+)')
23REGION_RE = re.compile(r'region +[0-9]+: va (?P<addr>0x[0-9a-f]+) '
24                       r'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)'
25                       r'( flags .{4} (\[(?P<elf_idx>[0-9]+)\])?)?')
26ELF_LIST_RE = re.compile(r'\[(?P<idx>[0-9]+)\] (?P<uuid>[0-9a-f\-]+)'
27                         r' @ (?P<load_addr>0x[0-9a-f\-]+)')
28FUNC_GRAPH_RE = re.compile(r'Function graph')
29GRAPH_ADDR_RE = re.compile(r'(?P<addr>0x[0-9a-f]+)')
30GRAPH_RE = re.compile(r'}')
31
32epilog = '''
33This scripts reads an OP-TEE abort or panic message from stdin and adds debug
34information to the output, such as '<function> at <file>:<line>' next to each
35address in the call stack. Any message generated by OP-TEE and containing a
36call stack can in principle be processed by this script. This currently
37includes aborts and panics from the TEE core as well as from any TA.
38The paths provided on the command line are used to locate the appropriate ELF
39binary (tee.elf or Trusted Application). The GNU binutils (addr2line, objdump,
40nm) are used to extract the debug info. If the CROSS_COMPILE environment
41variable is set, it is used as a prefix to the binutils tools. That is, the
42script will invoke $(CROSS_COMPILE)addr2line etc. If it is not set however,
43the prefix will be determined automatically for each ELF file based on its
44architecture (arm-linux-gnueabihf-, aarch64-linux-gnu-). The resulting command
45is then expected to be found in the user's PATH.
46
47OP-TEE abort and panic messages are sent to the secure console. They look like
48the following:
49
50  E/TC:0 User TA data-abort at address 0xffffdecd (alignment fault)
51  ...
52  E/TC:0 Call stack:
53  E/TC:0  0x4000549e
54  E/TC:0  0x40001f4b
55  E/TC:0  0x4000273f
56  E/TC:0  0x40005da7
57
58Inspired by a script of the same name by the Chromium project.
59
60Sample usage:
61
62  $ scripts/symbolize.py -d out/arm-plat-hikey/core -d ../optee_test/out/ta/*
63  <paste whole dump here>
64  ^D
65
66Also, this script reads function graph generated for OP-TEE user TA from
67/tmp/ftrace-<ta_uuid>.out file and resolves function addresses to corresponding
68symbols.
69
70Sample usage:
71
72  $ cat /tmp/ftrace-<ta_uuid>.out | scripts/symbolize.py -d <ta_uuid>.elf
73  <paste function graph here>
74  ^D
75'''
76
77
78def get_args():
79    parser = argparse.ArgumentParser(
80        formatter_class=argparse.RawDescriptionHelpFormatter,
81        description='Symbolizes OP-TEE abort dumps or function graphs',
82        epilog=epilog)
83    parser.add_argument('-d', '--dir', action='append', nargs='+',
84                        help='Search for ELF file in DIR. tee.elf is needed '
85                        'to decode a TEE Core or pseudo-TA abort, while '
86                        '<TA_uuid>.elf is required if a user-mode TA has '
87                        'crashed. For convenience, ELF files may also be '
88                        'given.')
89    parser.add_argument('-s', '--strip_path', nargs='?',
90                        help='Strip STRIP_PATH from file paths (default: '
91                        'current directory, use -s with no argument to show '
92                        'full paths)', default=os.getcwd())
93
94    return parser.parse_args()
95
96
97class Symbolizer(object):
98    def __init__(self, out, dirs, strip_path):
99        self._out = out
100        self._dirs = dirs
101        self._strip_path = strip_path
102        self._addr2line = None
103        self.reset()
104
105    def my_Popen(self, cmd):
106        try:
107            return subprocess.Popen(cmd, stdin=subprocess.PIPE,
108                                    stdout=subprocess.PIPE, text=True,
109                                    bufsize=1)
110        except OSError as e:
111            if e.errno == errno.ENOENT:
112                print("*** Error:{}: command not found".format(cmd[0]),
113                      file=sys.stderr)
114                sys.exit(1)
115
116    def get_elf(self, elf_or_uuid):
117        if not elf_or_uuid.endswith('.elf'):
118            elf_or_uuid += '.elf'
119        for d in self._dirs:
120            if d.endswith(elf_or_uuid) and os.path.isfile(d):
121                return d
122            elf = glob.glob(d + '/' + elf_or_uuid)
123            if elf:
124                return elf[0]
125
126    def set_arch(self):
127        if self._arch:
128            return
129        self._arch = os.getenv('CROSS_COMPILE')
130        if self._arch:
131            return
132        elf = self.get_elf(self._elfs[0][0])
133        if elf is None:
134            return
135        p = subprocess.Popen(['file', self.get_elf(self._elfs[0][0])],
136                             stdout=subprocess.PIPE)
137        output = p.stdout.readlines()
138        p.terminate()
139        if b'ARM aarch64,' in output[0]:
140            self._arch = 'aarch64-linux-gnu-'
141        elif b'ARM,' in output[0]:
142            self._arch = 'arm-linux-gnueabihf-'
143
144    def arch_prefix(self, cmd):
145        self.set_arch()
146        if self._arch is None:
147            return ''
148        return self._arch + cmd
149
150    def spawn_addr2line(self, elf_name):
151        if elf_name is None:
152            return
153        if self._addr2line_elf_name is elf_name:
154            return
155        if self._addr2line:
156            self._addr2line.terminate
157            self._addr2line = None
158        elf = self.get_elf(elf_name)
159        if not elf:
160            return
161        cmd = self.arch_prefix('addr2line')
162        if not cmd:
163            return
164        self._addr2line = self.my_Popen([cmd, '-f', '-p', '-e', elf])
165        self._addr2line_elf_name = elf_name
166
167    # If addr falls into a region that maps a TA ELF file, return the load
168    # address of that file.
169    def elf_load_addr(self, addr):
170        if self._regions:
171            for r in self._regions:
172                r_addr = int(r[0], 16)
173                r_size = int(r[1], 16)
174                i_addr = int(addr, 16)
175                if (i_addr >= r_addr and i_addr < (r_addr + r_size)):
176                    # Found region
177                    elf_idx = r[2]
178                    if elf_idx is not None:
179                        return self._elfs[int(elf_idx)][1]
180            return None
181        else:
182            # tee.elf
183            return '0x0'
184
185    def elf_for_addr(self, addr):
186        l_addr = self.elf_load_addr(addr)
187        if l_addr is None:
188            return None
189        if l_addr is '0x0':
190            return 'tee.elf'
191        for k in self._elfs:
192            e = self._elfs[k]
193            if int(e[1], 16) == int(l_addr, 16):
194                return e[0]
195        return None
196
197    def subtract_load_addr(self, addr):
198        l_addr = self.elf_load_addr(addr)
199        if l_addr is None:
200            return None
201        if int(l_addr, 16) > int(addr, 16):
202            return ''
203        return '0x{:x}'.format(int(addr, 16) - int(l_addr, 16))
204
205    def resolve(self, addr):
206        reladdr = self.subtract_load_addr(addr)
207        self.spawn_addr2line(self.elf_for_addr(addr))
208        if not reladdr or not self._addr2line:
209            return '???'
210        try:
211            print(reladdr, file=self._addr2line.stdin)
212            ret = self._addr2line.stdout.readline().rstrip('\n')
213        except IOError:
214            ret = '!!!'
215        return ret
216
217    def symbol_plus_offset(self, addr):
218        ret = ''
219        prevsize = 0
220        reladdr = self.subtract_load_addr(addr)
221        elf_name = self.elf_for_addr(addr)
222        if elf_name is None:
223            return ''
224        elf = self.get_elf(elf_name)
225        cmd = self.arch_prefix('nm')
226        if not reladdr or not elf or not cmd:
227            return ''
228        ireladdr = int(reladdr, 16)
229        nm = self.my_Popen([cmd, '--numeric-sort', '--print-size', elf])
230        for line in iter(nm.stdout.readline, ''):
231            try:
232                addr, size, _, name = line.split()
233            except ValueError:
234                # Size is missing
235                try:
236                    addr, _, name = line.split()
237                    size = '0'
238                except ValueError:
239                    # E.g., undefined (external) symbols (line = "U symbol")
240                    continue
241            iaddr = int(addr, 16)
242            isize = int(size, 16)
243            if iaddr == ireladdr:
244                ret = name
245                break
246            if iaddr < ireladdr and iaddr + isize >= ireladdr:
247                offs = ireladdr - iaddr
248                ret = name + '+' + str(offs)
249                break
250            if iaddr > ireladdr and prevsize == 0:
251                offs = iaddr + ireladdr
252                ret = prevname + '+' + str(offs)
253                break
254            prevsize = size
255            prevname = name
256        nm.terminate()
257        return ret
258
259    def section_plus_offset(self, addr):
260        ret = ''
261        reladdr = self.subtract_load_addr(addr)
262        elf_name = self.elf_for_addr(addr)
263        if elf_name is None:
264            return ''
265        elf = self.get_elf(elf_name)
266        cmd = self.arch_prefix('objdump')
267        if not reladdr or not elf or not cmd:
268            return ''
269        iaddr = int(reladdr, 16)
270        objdump = self.my_Popen([cmd, '--section-headers', elf])
271        for line in iter(objdump.stdout.readline, ''):
272            try:
273                idx, name, size, vma, lma, offs, algn = line.split()
274            except ValueError:
275                continue
276            ivma = int(vma, 16)
277            isize = int(size, 16)
278            if ivma == iaddr:
279                ret = name
280                break
281            if ivma < iaddr and ivma + isize >= iaddr:
282                offs = iaddr - ivma
283                ret = name + '+' + str(offs)
284                break
285        objdump.terminate()
286        return ret
287
288    def process_abort(self, line):
289        ret = ''
290        match = re.search(ABORT_ADDR_RE, line)
291        addr = match.group('addr')
292        pre = match.start('addr')
293        post = match.end('addr')
294        sym = self.symbol_plus_offset(addr)
295        sec = self.section_plus_offset(addr)
296        if sym or sec:
297            ret += line[:pre]
298            ret += addr
299            if sym:
300                ret += ' ' + sym
301            if sec:
302                ret += ' ' + sec
303            ret += line[post:]
304        return ret
305
306    # Return all ELF sections with the ALLOC flag
307    def read_sections(self, elf_name):
308        if elf_name is None:
309            return
310        if elf_name in self._sections:
311            return
312        elf = self.get_elf(elf_name)
313        cmd = self.arch_prefix('objdump')
314        if not elf or not cmd:
315            return
316        self._sections[elf_name] = []
317        objdump = self.my_Popen([cmd, '--section-headers', elf])
318        for line in iter(objdump.stdout.readline, ''):
319            try:
320                _, name, size, vma, _, _, _ = line.split()
321            except ValueError:
322                if 'ALLOC' in line:
323                    self._sections[elf_name].append([name, int(vma, 16),
324                                                     int(size, 16)])
325
326    def overlaps(self, section, addr, size):
327        sec_addr = section[1]
328        sec_size = section[2]
329        if not size or not sec_size:
330            return False
331        return ((addr <= (sec_addr + sec_size - 1)) and
332                ((addr + size - 1) >= sec_addr))
333
334    def sections_in_region(self, addr, size, elf_idx):
335        ret = ''
336        addr = self.subtract_load_addr(addr)
337        if not addr:
338            return ''
339        iaddr = int(addr, 16)
340        isize = int(size, 16)
341        elf = self._elfs[int(elf_idx)][0]
342        if elf is None:
343            return ''
344        self.read_sections(elf)
345        if elf not in self._sections:
346            return ''
347        for s in self._sections[elf]:
348            if self.overlaps(s, iaddr, isize):
349                ret += ' ' + s[0]
350        return ret
351
352    def reset(self):
353        self._call_stack_found = False
354        if self._addr2line:
355            self._addr2line.terminate()
356            self._addr2line = None
357        self._addr2line_elf_name = None
358        self._arch = None
359        self._saved_abort_line = ''
360        self._sections = {}  # {elf_name: [[name, addr, size], ...], ...}
361        self._regions = []   # [[addr, size, elf_idx, saved line], ...]
362        self._elfs = {0: ["tee.elf", 0]}  # {idx: [uuid, load_addr], ...}
363        self._func_graph_found = False
364        self._func_graph_skip_line = True
365
366    def pretty_print_path(self, path):
367        if self._strip_path:
368            return re.sub(re.escape(self._strip_path) + '/*', '', path)
369        return path
370
371    def write(self, line):
372        if self._call_stack_found:
373            match = re.search(STACK_ADDR_RE, line)
374            if match:
375                addr = match.group('addr')
376                pre = match.start('addr')
377                post = match.end('addr')
378                self._out.write(line[:pre])
379                self._out.write(addr)
380                res = self.resolve(addr)
381                res = self.pretty_print_path(res)
382                self._out.write(' ' + res)
383                self._out.write(line[post:])
384                return
385            else:
386                self.reset()
387        if self._func_graph_found:
388            match = re.search(GRAPH_ADDR_RE, line)
389            match_re = re.search(GRAPH_RE, line)
390            if match:
391                addr = match.group('addr')
392                pre = match.start('addr')
393                post = match.end('addr')
394                self._out.write(line[:pre])
395                res = self.resolve(addr)
396                res_arr = re.split(' ', res)
397                self._out.write(res_arr[0])
398                self._out.write(line[post:])
399                self._func_graph_skip_line = False
400                return
401            elif match_re:
402                self._out.write(line)
403                return
404            elif self._func_graph_skip_line:
405                return
406            else:
407                self.reset()
408        match = re.search(REGION_RE, line)
409        if match:
410            # Region table: save info for later processing once
411            # we know which UUID corresponds to which ELF index
412            addr = match.group('addr')
413            size = match.group('size')
414            elf_idx = match.group('elf_idx')
415            self._regions.append([addr, size, elf_idx, line])
416            return
417        match = re.search(ELF_LIST_RE, line)
418        if match:
419            # ELF list: save info for later. Region table and ELF list
420            # will be displayed when the call stack is reached
421            i = int(match.group('idx'))
422            self._elfs[i] = [match.group('uuid'), match.group('load_addr'),
423                             line]
424            return
425        match = re.search(CALL_STACK_RE, line)
426        if match:
427            self._call_stack_found = True
428            if self._regions:
429                for r in self._regions:
430                    r_addr = r[0]
431                    r_size = r[1]
432                    elf_idx = r[2]
433                    saved_line = r[3]
434                    if elf_idx is None:
435                        self._out.write(saved_line)
436                    else:
437                        self._out.write(saved_line.strip() +
438                                        self.sections_in_region(r_addr,
439                                                                r_size,
440                                                                elf_idx) +
441                                        '\n')
442            if self._elfs:
443                for k in self._elfs:
444                    e = self._elfs[k]
445                    if (len(e) >= 3):
446                        # TA executable or library
447                        self._out.write(e[2].strip())
448                        elf = self.get_elf(e[0])
449                        if elf:
450                            rpath = os.path.realpath(elf)
451                            path = self.pretty_print_path(rpath)
452                            self._out.write(' (' + path + ')')
453                        self._out.write('\n')
454            # Here is a good place to resolve the abort address because we
455            # have all the information we need
456            if self._saved_abort_line:
457                self._out.write(self.process_abort(self._saved_abort_line))
458        match = re.search(FUNC_GRAPH_RE, line)
459        if match:
460            self._func_graph_found = True
461        match = re.search(ABORT_ADDR_RE, line)
462        if match:
463            self.reset()
464            # At this point the arch and TA load address are unknown.
465            # Save the line so We can translate the abort address later.
466            self._saved_abort_line = line
467        self._out.write(line)
468
469    def flush(self):
470        self._out.flush()
471
472
473def main():
474    args = get_args()
475    if args.dir:
476        # Flatten list in case -d is used several times *and* with multiple
477        # arguments
478        args.dirs = [item for sublist in args.dir for item in sublist]
479    else:
480        args.dirs = []
481    symbolizer = Symbolizer(sys.stdout, args.dirs, args.strip_path)
482
483    fd = sys.stdin.fileno()
484    isatty = os.isatty(fd)
485    if isatty:
486        old = termios.tcgetattr(fd)
487        new = termios.tcgetattr(fd)
488        new[3] = new[3] & ~termios.ECHO  # lflags
489    try:
490        if isatty:
491            termios.tcsetattr(fd, termios.TCSADRAIN, new)
492        for line in sys.stdin:
493            symbolizer.write(line)
494    finally:
495        symbolizer.flush()
496        if isatty:
497            termios.tcsetattr(fd, termios.TCSADRAIN, old)
498
499
500if __name__ == "__main__":
501    main()
502