xref: /optee_os/scripts/symbolize.py (revision 8a6d4a8b6a5e21aaf15798af46dbf97c57420e7f)
1#!/usr/bin/env python
2# SPDX-License-Identifier: BSD-2-Clause
3#
4# Copyright (c) 2017, Linaro Limited
5#
6
7
8import argparse
9import glob
10import os
11import re
12import subprocess
13import sys
14
15CALL_STACK_RE = re.compile('Call stack:')
16# This gets the address from lines looking like this:
17# E/TC:0  0x001044a8
18STACK_ADDR_RE = re.compile(r'[UEIDFM]/T[AC]:.*(?P<addr>0x[0-9a-f]+)')
19ABORT_ADDR_RE = re.compile('-abort at address (?P<addr>0x[0-9a-f]+)')
20REGION_RE = re.compile('region [0-9]+: va (?P<addr>0x[0-9a-f]+) '
21                       'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)'
22                       '( flags .{6} (\[(?P<elf_idx>[0-9]+)\])?)?')
23ELF_LIST_RE = re.compile(r'\[(?P<idx>[0-9]+)\] (?P<uuid>[0-9a-f\-]+)'
24                         ' @ (?P<load_addr>0x[0-9a-f\-]+)')
25
26epilog = '''
27This scripts reads an OP-TEE abort or panic message from stdin and adds debug
28information to the output, such as '<function> at <file>:<line>' next to each
29address in the call stack. Any message generated by OP-TEE and containing a
30call stack can in principle be processed by this script. This currently
31includes aborts and panics from the TEE core as well as from any TA.
32The paths provided on the command line are used to locate the appropriate ELF
33binary (tee.elf or Trusted Application). The GNU binutils (addr2line, objdump,
34nm) are used to extract the debug info.
35
36OP-TEE abort and panic messages are sent to the secure console. They look like
37the following:
38
39  E/TC:0 User TA data-abort at address 0xffffdecd (alignment fault)
40  ...
41  E/TC:0 Call stack:
42  E/TC:0  0x4000549e
43  E/TC:0  0x40001f4b
44  E/TC:0  0x4000273f
45  E/TC:0  0x40005da7
46
47Inspired by a script of the same name by the Chromium project.
48
49Sample usage:
50
51  $ scripts/symbolize.py -d out/arm-plat-hikey/core -d ../optee_test/out/ta/*
52  <paste whole dump here>
53  ^D
54'''
55
56
57def get_args():
58    parser = argparse.ArgumentParser(
59                formatter_class=argparse.RawDescriptionHelpFormatter,
60                description='Symbolizes OP-TEE abort dumps',
61                epilog=epilog)
62    parser.add_argument('-d', '--dir', action='append', nargs='+',
63                        help='Search for ELF file in DIR. tee.elf is needed '
64                        'to decode a TEE Core or pseudo-TA abort, while '
65                        '<TA_uuid>.elf is required if a user-mode TA has '
66                        'crashed. For convenience, ELF files may also be '
67                        'given.')
68    parser.add_argument('-s', '--strip_path', nargs='?',
69                        help='Strip STRIP_PATH from file paths (default: '
70                        'current directory, use -s with no argument to show '
71                        'full paths)', default=os.getcwd())
72
73    return parser.parse_args()
74
75
76class Symbolizer(object):
77    def __init__(self, out, dirs, strip_path):
78        self._out = out
79        self._dirs = dirs
80        self._strip_path = strip_path
81        self._addr2line = None
82        self.reset()
83
84    def my_Popen(self, cmd):
85        try:
86            return subprocess.Popen(cmd, stdin=subprocess.PIPE,
87                                    stdout=subprocess.PIPE)
88        except OSError as e:
89            if e.errno == os.errno.ENOENT:
90                print >> sys.stderr, "*** Error:", cmd[0] + \
91                    ": command not found"
92                sys.exit(1)
93
94    def get_elf(self, elf_or_uuid):
95        if not elf_or_uuid.endswith('.elf'):
96            elf_or_uuid += '.elf'
97        for d in self._dirs:
98            if d.endswith(elf_or_uuid) and os.path.isfile(d):
99                return d
100            elf = glob.glob(d + '/' + elf_or_uuid)
101            if elf:
102                return elf[0]
103
104    def set_arch(self):
105        if self._arch:
106            return
107        self._arch = os.getenv('CROSS_COMPILE');
108        if self._arch:
109            return
110        elf = self.get_elf(self._elfs[0][0])
111        if elf is None:
112            return
113        p = subprocess.Popen(['file', self.get_elf(self._elfs[0][0])],
114                             stdout=subprocess.PIPE)
115        output = p.stdout.readlines()
116        p.terminate()
117        if 'ARM aarch64,' in output[0]:
118            self._arch = 'aarch64-linux-gnu-'
119        elif 'ARM,' in output[0]:
120            self._arch = 'arm-linux-gnueabihf-'
121
122    def arch_prefix(self, cmd):
123        self.set_arch()
124        if self._arch is None:
125            return ''
126        return self._arch + cmd
127
128    def spawn_addr2line(self, elf_name):
129        if elf_name is None:
130            return
131        if self._addr2line_elf_name is elf_name:
132            return
133        if self._addr2line:
134            self._addr2line.terminate
135            self._addr2line = None
136        elf = self.get_elf(elf_name)
137        if not elf:
138            return
139        cmd = self.arch_prefix('addr2line')
140        if not cmd:
141            return
142        self._addr2line = self.my_Popen([cmd, '-f', '-p', '-e', elf])
143        self._addr2line_elf_name = elf_name
144
145    # If addr falls into a region that maps a TA ELF file, return the load
146    # address of that file.
147    def elf_load_addr(self, addr):
148        if self._regions:
149            for r in self._regions:
150                r_addr = int(r[0], 16)
151                r_size = int(r[1], 16)
152                i_addr = int(addr, 16)
153                if (i_addr >= r_addr and i_addr < (r_addr + r_size)):
154                    # Found region
155                    elf_idx = r[2]
156                    if elf_idx is not None:
157                        return self._elfs[int(elf_idx)][1]
158            return None
159        else:
160            # tee.elf
161            return '0x0'
162
163    def elf_for_addr(self, addr):
164        l_addr = self.elf_load_addr(addr)
165        if l_addr is None:
166            return None
167        if l_addr is '0x0':
168            return 'tee.elf'
169        for k in self._elfs:
170            e = self._elfs[k]
171            if int(e[1], 16) == int(l_addr, 16):
172                return e[0]
173        return None
174
175    def subtract_load_addr(self, addr):
176        l_addr = self.elf_load_addr(addr)
177        if l_addr is None:
178            return None
179        if int(l_addr, 16) > int(addr, 16):
180            return ''
181        return '0x{:x}'.format(int(addr, 16) - int(l_addr, 16))
182
183    def resolve(self, addr):
184        reladdr = self.subtract_load_addr(addr)
185        self.spawn_addr2line(self.elf_for_addr(addr))
186        if not reladdr or not self._addr2line:
187            return '???'
188        try:
189            print >> self._addr2line.stdin, reladdr
190            ret = self._addr2line.stdout.readline().rstrip('\n')
191        except IOError:
192            ret = '!!!'
193        return ret
194
195    def symbol_plus_offset(self, addr):
196        ret = ''
197        prevsize = 0
198        reladdr = self.subtract_load_addr(addr)
199        elf_name = self.elf_for_addr(addr)
200        if elf_name is None:
201            return ''
202        elf = self.get_elf(elf_name)
203        cmd = self.arch_prefix('nm')
204        if not reladdr or not elf or not cmd:
205            return ''
206        ireladdr = int(reladdr, 16)
207        nm = self.my_Popen([cmd, '--numeric-sort', '--print-size', elf])
208        for line in iter(nm.stdout.readline, ''):
209            try:
210                addr, size, _, name = line.split()
211            except ValueError:
212                # Size is missing
213                try:
214                    addr, _, name = line.split()
215                    size = '0'
216                except ValueError:
217                    # E.g., undefined (external) symbols (line = "U symbol")
218                    continue
219            iaddr = int(addr, 16)
220            isize = int(size, 16)
221            if iaddr == ireladdr:
222                ret = name
223                break
224            if iaddr < ireladdr and iaddr + isize >= ireladdr:
225                offs = ireladdr - iaddr
226                ret = name + '+' + str(offs)
227                break
228            if iaddr > ireladdr and prevsize == 0:
229                offs = iaddr + ireladdr
230                ret = prevname + '+' + str(offs)
231                break
232            prevsize = size
233            prevname = name
234        nm.terminate()
235        return ret
236
237    def section_plus_offset(self, addr):
238        ret = ''
239        reladdr = self.subtract_load_addr(addr)
240        elf_name = self.elf_for_addr(addr)
241        if elf_name is None:
242            return ''
243        elf = self.get_elf(elf_name)
244        cmd = self.arch_prefix('objdump')
245        if not reladdr or not elf or not cmd:
246            return ''
247        iaddr = int(reladdr, 16)
248        objdump = self.my_Popen([cmd, '--section-headers', elf])
249        for line in iter(objdump.stdout.readline, ''):
250            try:
251                idx, name, size, vma, lma, offs, algn = line.split()
252            except ValueError:
253                continue
254            ivma = int(vma, 16)
255            isize = int(size, 16)
256            if ivma == iaddr:
257                ret = name
258                break
259            if ivma < iaddr and ivma + isize >= iaddr:
260                offs = iaddr - ivma
261                ret = name + '+' + str(offs)
262                break
263        objdump.terminate()
264        return ret
265
266    def process_abort(self, line):
267        ret = ''
268        match = re.search(ABORT_ADDR_RE, line)
269        addr = match.group('addr')
270        pre = match.start('addr')
271        post = match.end('addr')
272        sym = self.symbol_plus_offset(addr)
273        sec = self.section_plus_offset(addr)
274        if sym or sec:
275            ret += line[:pre]
276            ret += addr
277            if sym:
278                ret += ' ' + sym
279            if sec:
280                ret += ' ' + sec
281            ret += line[post:]
282        return ret
283
284    # Return all ELF sections with the ALLOC flag
285    def read_sections(self, elf_name):
286        if elf_name is None:
287            return
288        if elf_name in self._sections:
289            return
290        elf = self.get_elf(elf_name)
291        cmd = self.arch_prefix('objdump')
292        if not elf or not cmd:
293            return
294        self._sections[elf_name] = []
295        objdump = self.my_Popen([cmd, '--section-headers', elf])
296        for line in iter(objdump.stdout.readline, ''):
297            try:
298                _, name, size, vma, _, _, _ = line.split()
299            except ValueError:
300                if 'ALLOC' in line:
301                    self._sections[elf_name].append([name, int(vma, 16),
302                                                     int(size, 16)])
303
304    def overlaps(self, section, addr, size):
305        sec_addr = section[1]
306        sec_size = section[2]
307        if not size or not sec_size:
308            return False
309        return ((addr <= (sec_addr + sec_size - 1)) and
310                ((addr + size - 1) >= sec_addr))
311
312    def sections_in_region(self, addr, size, elf_idx):
313        ret = ''
314        addr = self.subtract_load_addr(addr)
315        if not addr:
316            return ''
317        iaddr = int(addr, 16)
318        isize = int(size, 16)
319        elf = self._elfs[int(elf_idx)][0]
320        if elf is None:
321            return ''
322        self.read_sections(elf)
323        if elf not in self._sections:
324            return ''
325        for s in self._sections[elf]:
326            if self.overlaps(s, iaddr, isize):
327                ret += ' ' + s[0]
328        return ret
329
330    def reset(self):
331        self._call_stack_found = False
332        if self._addr2line:
333            self._addr2line.terminate()
334            self._addr2line = None
335        self._addr2line_elf_name = None
336        self._arch = None
337        self._saved_abort_line = ''
338        self._sections = {}  # {elf_name: [[name, addr, size], ...], ...}
339        self._regions = []   # [[addr, size, elf_idx, saved line], ...]
340        self._elfs = {0: ["tee.elf", 0]}  # {idx: [uuid, load_addr], ...}
341
342    def pretty_print_path(self, path):
343        if self._strip_path:
344            return re.sub(re.escape(self._strip_path) + '/*', '', path)
345        return path
346
347    def write(self, line):
348            if self._call_stack_found:
349                match = re.search(STACK_ADDR_RE, line)
350                if match:
351                    addr = match.group('addr')
352                    pre = match.start('addr')
353                    post = match.end('addr')
354                    self._out.write(line[:pre])
355                    self._out.write(addr)
356                    res = self.resolve(addr)
357                    res = self.pretty_print_path(res)
358                    self._out.write(' ' + res)
359                    self._out.write(line[post:])
360                    return
361                else:
362                    self.reset()
363            match = re.search(REGION_RE, line)
364            if match:
365                # Region table: save info for later processing once
366                # we know which UUID corresponds to which ELF index
367                addr = match.group('addr')
368                size = match.group('size')
369                elf_idx = match.group('elf_idx')
370                self._regions.append([addr, size, elf_idx, line])
371                return
372            match = re.search(ELF_LIST_RE, line)
373            if match:
374                # ELF list: save info for later. Region table and ELF list
375                # will be displayed when the call stack is reached
376                i = int(match.group('idx'))
377                self._elfs[i] = [match.group('uuid'), match.group('load_addr'),
378                                 line]
379                return
380            match = re.search(CALL_STACK_RE, line)
381            if match:
382                self._call_stack_found = True
383                if self._regions:
384                    for r in self._regions:
385                        r_addr = r[0]
386                        r_size = r[1]
387                        elf_idx = r[2]
388                        saved_line = r[3]
389                        if elf_idx is None:
390                            self._out.write(saved_line)
391                        else:
392                            self._out.write(saved_line.strip() +
393                                            self.sections_in_region(r_addr,
394                                                                    r_size,
395                                                                    elf_idx) +
396                                            '\n')
397                if self._elfs:
398                    for k in self._elfs:
399                        e = self._elfs[k]
400                        if (len(e) >= 3):
401                            # TA executable or library
402                            self._out.write(e[2].strip())
403                            elf = self.get_elf(e[0])
404                            if elf:
405                                rpath = os.path.realpath(elf)
406                                path = self.pretty_print_path(rpath)
407                                self._out.write(' (' + path + ')')
408                            self._out.write('\n')
409                # Here is a good place to resolve the abort address because we
410                # have all the information we need
411                if self._saved_abort_line:
412                    self._out.write(self.process_abort(self._saved_abort_line))
413            match = re.search(ABORT_ADDR_RE, line)
414            if match:
415                self.reset()
416                # At this point the arch and TA load address are unknown.
417                # Save the line so We can translate the abort address later.
418                self._saved_abort_line = line
419            self._out.write(line)
420
421    def flush(self):
422        self._out.flush()
423
424
425def main():
426    args = get_args()
427    if args.dir:
428        # Flatten list in case -d is used several times *and* with multiple
429        # arguments
430        args.dirs = [item for sublist in args.dir for item in sublist]
431    else:
432        args.dirs = []
433    symbolizer = Symbolizer(sys.stdout, args.dirs, args.strip_path)
434
435    for line in sys.stdin:
436        symbolizer.write(line)
437    symbolizer.flush()
438
439
440if __name__ == "__main__":
441    main()
442