xref: /optee_os/scripts/symbolize.py (revision bbaeed4dc6258006e846543197b8aff95d80abbf)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: BSD-2-Clause
3#
4# Copyright (c) 2017, Linaro Limited
5#
6
7
8import argparse
9import errno
10import glob
11import os
12import re
13import subprocess
14import sys
15
16CALL_STACK_RE = re.compile('Call stack:')
17# This gets the address from lines looking like this:
18# E/TC:0  0x001044a8
19STACK_ADDR_RE = re.compile(
20    r'[UEIDFM]/(TC|LD):(\?*|[0-9]*) [0-9]* +(?P<addr>0x[0-9a-f]+)')
21ABORT_ADDR_RE = re.compile(r'-abort at address (?P<addr>0x[0-9a-f]+)')
22REGION_RE = re.compile(r'region +[0-9]+: va (?P<addr>0x[0-9a-f]+) '
23                       r'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)'
24                       r'( flags .{4} (\[(?P<elf_idx>[0-9]+)\])?)?')
25ELF_LIST_RE = re.compile(r'\[(?P<idx>[0-9]+)\] (?P<uuid>[0-9a-f\-]+)'
26                         r' @ (?P<load_addr>0x[0-9a-f\-]+)')
27FUNC_GRAPH_RE = re.compile(r'Function graph')
28GRAPH_ADDR_RE = re.compile(r'(?P<addr>0x[0-9a-f]+)')
29GRAPH_RE = re.compile(r'}')
30
31epilog = '''
32This scripts reads an OP-TEE abort or panic message from stdin and adds debug
33information to the output, such as '<function> at <file>:<line>' next to each
34address in the call stack. Any message generated by OP-TEE and containing a
35call stack can in principle be processed by this script. This currently
36includes aborts and panics from the TEE core as well as from any TA.
37The paths provided on the command line are used to locate the appropriate ELF
38binary (tee.elf or Trusted Application). The GNU binutils (addr2line, objdump,
39nm) are used to extract the debug info. If the CROSS_COMPILE environment
40variable is set, it is used as a prefix to the binutils tools. That is, the
41script will invoke $(CROSS_COMPILE)addr2line etc. If it is not set however,
42the prefix will be determined automatically for each ELF file based on its
43architecture (arm-linux-gnueabihf-, aarch64-linux-gnu-). The resulting command
44is then expected to be found in the user's PATH.
45
46OP-TEE abort and panic messages are sent to the secure console. They look like
47the following:
48
49  E/TC:0 User TA data-abort at address 0xffffdecd (alignment fault)
50  ...
51  E/TC:0 Call stack:
52  E/TC:0  0x4000549e
53  E/TC:0  0x40001f4b
54  E/TC:0  0x4000273f
55  E/TC:0  0x40005da7
56
57Inspired by a script of the same name by the Chromium project.
58
59Sample usage:
60
61  $ scripts/symbolize.py -d out/arm-plat-hikey/core -d ../optee_test/out/ta/*
62  <paste whole dump here>
63  ^D
64
65Also, this script reads function graph generated for OP-TEE user TA from
66/tmp/ftrace-<ta_uuid>.out file and resolves function addresses to corresponding
67symbols.
68
69Sample usage:
70
71  $ cat /tmp/ftrace-<ta_uuid>.out | scripts/symbolize.py -d <ta_uuid>.elf
72  <paste function graph here>
73  ^D
74'''
75
76
77def get_args():
78    parser = argparse.ArgumentParser(
79        formatter_class=argparse.RawDescriptionHelpFormatter,
80        description='Symbolizes OP-TEE abort dumps or function graphs',
81        epilog=epilog)
82    parser.add_argument('-d', '--dir', action='append', nargs='+',
83                        help='Search for ELF file in DIR. tee.elf is needed '
84                        'to decode a TEE Core or pseudo-TA abort, while '
85                        '<TA_uuid>.elf is required if a user-mode TA has '
86                        'crashed. For convenience, ELF files may also be '
87                        'given.')
88    parser.add_argument('-s', '--strip_path', nargs='?',
89                        help='Strip STRIP_PATH from file paths (default: '
90                        'current directory, use -s with no argument to show '
91                        'full paths)', default=os.getcwd())
92
93    return parser.parse_args()
94
95
96class Symbolizer(object):
97    def __init__(self, out, dirs, strip_path):
98        self._out = out
99        self._dirs = dirs
100        self._strip_path = strip_path
101        self._addr2line = None
102        self.reset()
103
104    def my_Popen(self, cmd):
105        try:
106            return subprocess.Popen(cmd, stdin=subprocess.PIPE,
107                                    stdout=subprocess.PIPE, text=True,
108                                    bufsize=1)
109        except OSError as e:
110            if e.errno == errno.ENOENT:
111                print("*** Error:{}: command not found".format(cmd[0]),
112                      file=sys.stderr)
113                sys.exit(1)
114
115    def get_elf(self, elf_or_uuid):
116        if not elf_or_uuid.endswith('.elf'):
117            elf_or_uuid += '.elf'
118        for d in self._dirs:
119            if d.endswith(elf_or_uuid) and os.path.isfile(d):
120                return d
121            elf = glob.glob(d + '/' + elf_or_uuid)
122            if elf:
123                return elf[0]
124
125    def set_arch(self):
126        if self._arch:
127            return
128        self._arch = os.getenv('CROSS_COMPILE')
129        if self._arch:
130            return
131        elf = self.get_elf(self._elfs[0][0])
132        if elf is None:
133            return
134        p = subprocess.Popen(['file', self.get_elf(self._elfs[0][0])],
135                             stdout=subprocess.PIPE)
136        output = p.stdout.readlines()
137        p.terminate()
138        if b'ARM aarch64,' in output[0]:
139            self._arch = 'aarch64-linux-gnu-'
140        elif b'ARM,' in output[0]:
141            self._arch = 'arm-linux-gnueabihf-'
142
143    def arch_prefix(self, cmd):
144        self.set_arch()
145        if self._arch is None:
146            return ''
147        return self._arch + cmd
148
149    def spawn_addr2line(self, elf_name):
150        if elf_name is None:
151            return
152        if self._addr2line_elf_name is elf_name:
153            return
154        if self._addr2line:
155            self._addr2line.terminate
156            self._addr2line = None
157        elf = self.get_elf(elf_name)
158        if not elf:
159            return
160        cmd = self.arch_prefix('addr2line')
161        if not cmd:
162            return
163        self._addr2line = self.my_Popen([cmd, '-f', '-p', '-e', elf])
164        self._addr2line_elf_name = elf_name
165
166    # If addr falls into a region that maps a TA ELF file, return the load
167    # address of that file.
168    def elf_load_addr(self, addr):
169        if self._regions:
170            for r in self._regions:
171                r_addr = int(r[0], 16)
172                r_size = int(r[1], 16)
173                i_addr = int(addr, 16)
174                if (i_addr >= r_addr and i_addr < (r_addr + r_size)):
175                    # Found region
176                    elf_idx = r[2]
177                    if elf_idx is not None:
178                        return self._elfs[int(elf_idx)][1]
179            return None
180        else:
181            # tee.elf
182            return '0x0'
183
184    def elf_for_addr(self, addr):
185        l_addr = self.elf_load_addr(addr)
186        if l_addr is None:
187            return None
188        if l_addr is '0x0':
189            return 'tee.elf'
190        for k in self._elfs:
191            e = self._elfs[k]
192            if int(e[1], 16) == int(l_addr, 16):
193                return e[0]
194        return None
195
196    def subtract_load_addr(self, addr):
197        l_addr = self.elf_load_addr(addr)
198        if l_addr is None:
199            return None
200        if int(l_addr, 16) > int(addr, 16):
201            return ''
202        return '0x{:x}'.format(int(addr, 16) - int(l_addr, 16))
203
204    def resolve(self, addr):
205        reladdr = self.subtract_load_addr(addr)
206        self.spawn_addr2line(self.elf_for_addr(addr))
207        if not reladdr or not self._addr2line:
208            return '???'
209        try:
210            print(reladdr, file=self._addr2line.stdin)
211            ret = self._addr2line.stdout.readline().rstrip('\n')
212        except IOError:
213            ret = '!!!'
214        return ret
215
216    def symbol_plus_offset(self, addr):
217        ret = ''
218        prevsize = 0
219        reladdr = self.subtract_load_addr(addr)
220        elf_name = self.elf_for_addr(addr)
221        if elf_name is None:
222            return ''
223        elf = self.get_elf(elf_name)
224        cmd = self.arch_prefix('nm')
225        if not reladdr or not elf or not cmd:
226            return ''
227        ireladdr = int(reladdr, 16)
228        nm = self.my_Popen([cmd, '--numeric-sort', '--print-size', elf])
229        for line in iter(nm.stdout.readline, ''):
230            try:
231                addr, size, _, name = line.split()
232            except ValueError:
233                # Size is missing
234                try:
235                    addr, _, name = line.split()
236                    size = '0'
237                except ValueError:
238                    # E.g., undefined (external) symbols (line = "U symbol")
239                    continue
240            iaddr = int(addr, 16)
241            isize = int(size, 16)
242            if iaddr == ireladdr:
243                ret = name
244                break
245            if iaddr < ireladdr and iaddr + isize >= ireladdr:
246                offs = ireladdr - iaddr
247                ret = name + '+' + str(offs)
248                break
249            if iaddr > ireladdr and prevsize == 0:
250                offs = iaddr + ireladdr
251                ret = prevname + '+' + str(offs)
252                break
253            prevsize = size
254            prevname = name
255        nm.terminate()
256        return ret
257
258    def section_plus_offset(self, addr):
259        ret = ''
260        reladdr = self.subtract_load_addr(addr)
261        elf_name = self.elf_for_addr(addr)
262        if elf_name is None:
263            return ''
264        elf = self.get_elf(elf_name)
265        cmd = self.arch_prefix('objdump')
266        if not reladdr or not elf or not cmd:
267            return ''
268        iaddr = int(reladdr, 16)
269        objdump = self.my_Popen([cmd, '--section-headers', elf])
270        for line in iter(objdump.stdout.readline, ''):
271            try:
272                idx, name, size, vma, lma, offs, algn = line.split()
273            except ValueError:
274                continue
275            ivma = int(vma, 16)
276            isize = int(size, 16)
277            if ivma == iaddr:
278                ret = name
279                break
280            if ivma < iaddr and ivma + isize >= iaddr:
281                offs = iaddr - ivma
282                ret = name + '+' + str(offs)
283                break
284        objdump.terminate()
285        return ret
286
287    def process_abort(self, line):
288        ret = ''
289        match = re.search(ABORT_ADDR_RE, line)
290        addr = match.group('addr')
291        pre = match.start('addr')
292        post = match.end('addr')
293        sym = self.symbol_plus_offset(addr)
294        sec = self.section_plus_offset(addr)
295        if sym or sec:
296            ret += line[:pre]
297            ret += addr
298            if sym:
299                ret += ' ' + sym
300            if sec:
301                ret += ' ' + sec
302            ret += line[post:]
303        return ret
304
305    # Return all ELF sections with the ALLOC flag
306    def read_sections(self, elf_name):
307        if elf_name is None:
308            return
309        if elf_name in self._sections:
310            return
311        elf = self.get_elf(elf_name)
312        cmd = self.arch_prefix('objdump')
313        if not elf or not cmd:
314            return
315        self._sections[elf_name] = []
316        objdump = self.my_Popen([cmd, '--section-headers', elf])
317        for line in iter(objdump.stdout.readline, ''):
318            try:
319                _, name, size, vma, _, _, _ = line.split()
320            except ValueError:
321                if 'ALLOC' in line:
322                    self._sections[elf_name].append([name, int(vma, 16),
323                                                     int(size, 16)])
324
325    def overlaps(self, section, addr, size):
326        sec_addr = section[1]
327        sec_size = section[2]
328        if not size or not sec_size:
329            return False
330        return ((addr <= (sec_addr + sec_size - 1)) and
331                ((addr + size - 1) >= sec_addr))
332
333    def sections_in_region(self, addr, size, elf_idx):
334        ret = ''
335        addr = self.subtract_load_addr(addr)
336        if not addr:
337            return ''
338        iaddr = int(addr, 16)
339        isize = int(size, 16)
340        elf = self._elfs[int(elf_idx)][0]
341        if elf is None:
342            return ''
343        self.read_sections(elf)
344        if elf not in self._sections:
345            return ''
346        for s in self._sections[elf]:
347            if self.overlaps(s, iaddr, isize):
348                ret += ' ' + s[0]
349        return ret
350
351    def reset(self):
352        self._call_stack_found = False
353        if self._addr2line:
354            self._addr2line.terminate()
355            self._addr2line = None
356        self._addr2line_elf_name = None
357        self._arch = None
358        self._saved_abort_line = ''
359        self._sections = {}  # {elf_name: [[name, addr, size], ...], ...}
360        self._regions = []   # [[addr, size, elf_idx, saved line], ...]
361        self._elfs = {0: ["tee.elf", 0]}  # {idx: [uuid, load_addr], ...}
362        self._func_graph_found = False
363        self._func_graph_skip_line = True
364
365    def pretty_print_path(self, path):
366        if self._strip_path:
367            return re.sub(re.escape(self._strip_path) + '/*', '', path)
368        return path
369
370    def write(self, line):
371        if self._call_stack_found:
372            match = re.search(STACK_ADDR_RE, line)
373            if match:
374                addr = match.group('addr')
375                pre = match.start('addr')
376                post = match.end('addr')
377                self._out.write(line[:pre])
378                self._out.write(addr)
379                res = self.resolve(addr)
380                res = self.pretty_print_path(res)
381                self._out.write(' ' + res)
382                self._out.write(line[post:])
383                return
384            else:
385                self.reset()
386        if self._func_graph_found:
387            match = re.search(GRAPH_ADDR_RE, line)
388            match_re = re.search(GRAPH_RE, line)
389            if match:
390                addr = match.group('addr')
391                pre = match.start('addr')
392                post = match.end('addr')
393                self._out.write(line[:pre])
394                res = self.resolve(addr)
395                res_arr = re.split(' ', res)
396                self._out.write(res_arr[0])
397                self._out.write(line[post:])
398                self._func_graph_skip_line = False
399                return
400            elif match_re:
401                self._out.write(line)
402                return
403            elif self._func_graph_skip_line:
404                return
405            else:
406                self.reset()
407        match = re.search(REGION_RE, line)
408        if match:
409            # Region table: save info for later processing once
410            # we know which UUID corresponds to which ELF index
411            addr = match.group('addr')
412            size = match.group('size')
413            elf_idx = match.group('elf_idx')
414            self._regions.append([addr, size, elf_idx, line])
415            return
416        match = re.search(ELF_LIST_RE, line)
417        if match:
418            # ELF list: save info for later. Region table and ELF list
419            # will be displayed when the call stack is reached
420            i = int(match.group('idx'))
421            self._elfs[i] = [match.group('uuid'), match.group('load_addr'),
422                             line]
423            return
424        match = re.search(CALL_STACK_RE, line)
425        if match:
426            self._call_stack_found = True
427            if self._regions:
428                for r in self._regions:
429                    r_addr = r[0]
430                    r_size = r[1]
431                    elf_idx = r[2]
432                    saved_line = r[3]
433                    if elf_idx is None:
434                        self._out.write(saved_line)
435                    else:
436                        self._out.write(saved_line.strip() +
437                                        self.sections_in_region(r_addr,
438                                                                r_size,
439                                                                elf_idx) +
440                                        '\n')
441            if self._elfs:
442                for k in self._elfs:
443                    e = self._elfs[k]
444                    if (len(e) >= 3):
445                        # TA executable or library
446                        self._out.write(e[2].strip())
447                        elf = self.get_elf(e[0])
448                        if elf:
449                            rpath = os.path.realpath(elf)
450                            path = self.pretty_print_path(rpath)
451                            self._out.write(' (' + path + ')')
452                        self._out.write('\n')
453            # Here is a good place to resolve the abort address because we
454            # have all the information we need
455            if self._saved_abort_line:
456                self._out.write(self.process_abort(self._saved_abort_line))
457        match = re.search(FUNC_GRAPH_RE, line)
458        if match:
459            self._func_graph_found = True
460        match = re.search(ABORT_ADDR_RE, line)
461        if match:
462            self.reset()
463            # At this point the arch and TA load address are unknown.
464            # Save the line so We can translate the abort address later.
465            self._saved_abort_line = line
466        self._out.write(line)
467
468    def flush(self):
469        self._out.flush()
470
471
472def main():
473    args = get_args()
474    if args.dir:
475        # Flatten list in case -d is used several times *and* with multiple
476        # arguments
477        args.dirs = [item for sublist in args.dir for item in sublist]
478    else:
479        args.dirs = []
480    symbolizer = Symbolizer(sys.stdout, args.dirs, args.strip_path)
481
482    for line in sys.stdin:
483        symbolizer.write(line)
484    symbolizer.flush()
485
486
487if __name__ == "__main__":
488    main()
489