xref: /optee_os/scripts/symbolize.py (revision 439203cb04cee677a6cd8bf9cb02b82d626713cc)
1#!/usr/bin/env python
2# SPDX-License-Identifier: BSD-2-Clause
3#
4# Copyright (c) 2017, Linaro Limited
5#
6
7
8import argparse
9import glob
10import os
11import re
12import subprocess
13import sys
14
15CALL_STACK_RE = re.compile('Call stack:')
16# This gets the address from lines looking like this:
17# E/TC:0  0x001044a8
18STACK_ADDR_RE = re.compile(r'[UEIDFM]/T[AC]:.*(?P<addr>0x[0-9a-f]+)')
19ABORT_ADDR_RE = re.compile('-abort at address (?P<addr>0x[0-9a-f]+)')
20REGION_RE = re.compile('region [0-9]+: va (?P<addr>0x[0-9a-f]+) '
21                       'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)'
22                       '( flags .{6} (\[(?P<elf_idx>[0-9]+)\])?)?')
23ELF_LIST_RE = re.compile(r'\[(?P<idx>[0-9]+)\] (?P<uuid>[0-9a-f\-]+)'
24                         ' @ (?P<load_addr>0x[0-9a-f\-]+)')
25
26epilog = '''
27This scripts reads an OP-TEE abort or panic message from stdin and adds debug
28information to the output, such as '<function> at <file>:<line>' next to each
29address in the call stack. Any message generated by OP-TEE and containing a
30call stack can in principle be processed by this script. This currently
31includes aborts and panics from the TEE core as well as from any TA.
32The paths provided on the command line are used to locate the appropriate ELF
33binary (tee.elf or Trusted Application). The GNU binutils (addr2line, objdump,
34nm) are used to extract the debug info.
35
36OP-TEE abort and panic messages are sent to the secure console. They look like
37the following:
38
39  E/TC:0 User TA data-abort at address 0xffffdecd (alignment fault)
40  ...
41  E/TC:0 Call stack:
42  E/TC:0  0x4000549e
43  E/TC:0  0x40001f4b
44  E/TC:0  0x4000273f
45  E/TC:0  0x40005da7
46
47Inspired by a script of the same name by the Chromium project.
48
49Sample usage:
50
51  $ scripts/symbolize.py -d out/arm-plat-hikey/core -d ../optee_test/out/ta/*
52  <paste whole dump here>
53  ^D
54'''
55
56
57def get_args():
58    parser = argparse.ArgumentParser(
59                formatter_class=argparse.RawDescriptionHelpFormatter,
60                description='Symbolizes OP-TEE abort dumps',
61                epilog=epilog)
62    parser.add_argument('-d', '--dir', action='append', nargs='+',
63                        help='Search for ELF file in DIR. tee.elf is needed '
64                        'to decode a TEE Core or pseudo-TA abort, while '
65                        '<TA_uuid>.elf is required if a user-mode TA has '
66                        'crashed. For convenience, ELF files may also be '
67                        'given.')
68    parser.add_argument('-s', '--strip_path', nargs='?',
69                        help='Strip STRIP_PATH from file paths (default: '
70                        'current directory, use -s with no argument to show '
71                        'full paths)', default=os.getcwd())
72
73    return parser.parse_args()
74
75
76class Symbolizer(object):
77    def __init__(self, out, dirs, strip_path):
78        self._out = out
79        self._dirs = dirs
80        self._strip_path = strip_path
81        self._addr2line = None
82        self.reset()
83
84    def my_Popen(self, cmd):
85        try:
86            return subprocess.Popen(cmd, stdin=subprocess.PIPE,
87                                    stdout=subprocess.PIPE)
88        except OSError as e:
89            if e.errno == os.errno.ENOENT:
90                print >> sys.stderr, "*** Error:", cmd[0] + \
91                    ": command not found"
92                sys.exit(1)
93
94    def get_elf(self, elf_or_uuid):
95        if not elf_or_uuid.endswith('.elf'):
96            elf_or_uuid += '.elf'
97        for d in self._dirs:
98            if d.endswith(elf_or_uuid) and os.path.isfile(d):
99                return d
100            elf = glob.glob(d + '/' + elf_or_uuid)
101            if elf:
102                return elf[0]
103
104    def set_arch(self):
105        if self._arch:
106            return
107        elf = self.get_elf(self._elfs[0][0])
108        if elf is None:
109            return
110        p = subprocess.Popen(['file', self.get_elf(self._elfs[0][0])],
111                             stdout=subprocess.PIPE)
112        output = p.stdout.readlines()
113        p.terminate()
114        if 'ARM aarch64,' in output[0]:
115            self._arch = 'aarch64-linux-gnu-'
116        elif 'ARM,' in output[0]:
117            self._arch = 'arm-linux-gnueabihf-'
118
119    def arch_prefix(self, cmd):
120        self.set_arch()
121        if self._arch is None:
122            return ''
123        return self._arch + cmd
124
125    def spawn_addr2line(self, elf_name):
126        if elf_name is None:
127            return
128        if self._addr2line_elf_name is elf_name:
129            return
130        if self._addr2line:
131            self._addr2line.terminate
132            self._addr2line = None
133        elf = self.get_elf(elf_name)
134        if not elf:
135            return
136        cmd = self.arch_prefix('addr2line')
137        if not cmd:
138            return
139        self._addr2line = self.my_Popen([cmd, '-f', '-p', '-e', elf])
140        self._addr2line_elf_name = elf_name
141
142    # If addr falls into a region that maps a TA ELF file, return the load
143    # address of that file.
144    def elf_load_addr(self, addr):
145        if self._regions:
146            for r in self._regions:
147                r_addr = int(r[0], 16)
148                r_size = int(r[1], 16)
149                i_addr = int(addr, 16)
150                if (i_addr >= r_addr and i_addr < (r_addr + r_size)):
151                    # Found region
152                    elf_idx = r[2]
153                    if elf_idx is not None:
154                        return self._elfs[int(elf_idx)][1]
155            return None
156        else:
157            # tee.elf
158            return '0x0'
159
160    def elf_for_addr(self, addr):
161        l_addr = self.elf_load_addr(addr)
162        if l_addr is None:
163            return None
164        if l_addr is '0x0':
165            return 'tee.elf'
166        for k in self._elfs:
167            e = self._elfs[k]
168            if int(e[1], 16) == int(l_addr, 16):
169                return e[0]
170        return None
171
172    def subtract_load_addr(self, addr):
173        l_addr = self.elf_load_addr(addr)
174        if l_addr is None:
175            return None
176        if int(l_addr, 16) > int(addr, 16):
177            return ''
178        return '0x{:x}'.format(int(addr, 16) - int(l_addr, 16))
179
180    def resolve(self, addr):
181        reladdr = self.subtract_load_addr(addr)
182        self.spawn_addr2line(self.elf_for_addr(addr))
183        if not reladdr or not self._addr2line:
184            return '???'
185        try:
186            print >> self._addr2line.stdin, reladdr
187            ret = self._addr2line.stdout.readline().rstrip('\n')
188        except IOError:
189            ret = '!!!'
190        return ret
191
192    def symbol_plus_offset(self, addr):
193        ret = ''
194        prevsize = 0
195        reladdr = self.subtract_load_addr(addr)
196        elf_name = self.elf_for_addr(addr)
197        if elf_name is None:
198            return ''
199        elf = self.get_elf(elf_name)
200        cmd = self.arch_prefix('nm')
201        if not reladdr or not elf or not cmd:
202            return ''
203        ireladdr = int(reladdr, 16)
204        nm = self.my_Popen([cmd, '--numeric-sort', '--print-size', elf])
205        for line in iter(nm.stdout.readline, ''):
206            try:
207                addr, size, _, name = line.split()
208            except ValueError:
209                # Size is missing
210                try:
211                    addr, _, name = line.split()
212                    size = '0'
213                except ValueError:
214                    # E.g., undefined (external) symbols (line = "U symbol")
215                    continue
216            iaddr = int(addr, 16)
217            isize = int(size, 16)
218            if iaddr == ireladdr:
219                ret = name
220                break
221            if iaddr < ireladdr and iaddr + isize >= ireladdr:
222                offs = ireladdr - iaddr
223                ret = name + '+' + str(offs)
224                break
225            if iaddr > ireladdr and prevsize == 0:
226                offs = iaddr + ireladdr
227                ret = prevname + '+' + str(offs)
228                break
229            prevsize = size
230            prevname = name
231        nm.terminate()
232        return ret
233
234    def section_plus_offset(self, addr):
235        ret = ''
236        reladdr = self.subtract_load_addr(addr)
237        elf_name = self.elf_for_addr(addr)
238        if elf_name is None:
239            return ''
240        elf = self.get_elf(elf_name)
241        cmd = self.arch_prefix('objdump')
242        if not reladdr or not elf or not cmd:
243            return ''
244        iaddr = int(reladdr, 16)
245        objdump = self.my_Popen([cmd, '--section-headers', elf])
246        for line in iter(objdump.stdout.readline, ''):
247            try:
248                idx, name, size, vma, lma, offs, algn = line.split()
249            except ValueError:
250                continue
251            ivma = int(vma, 16)
252            isize = int(size, 16)
253            if ivma == iaddr:
254                ret = name
255                break
256            if ivma < iaddr and ivma + isize >= iaddr:
257                offs = iaddr - ivma
258                ret = name + '+' + str(offs)
259                break
260        objdump.terminate()
261        return ret
262
263    def process_abort(self, line):
264        ret = ''
265        match = re.search(ABORT_ADDR_RE, line)
266        addr = match.group('addr')
267        pre = match.start('addr')
268        post = match.end('addr')
269        sym = self.symbol_plus_offset(addr)
270        sec = self.section_plus_offset(addr)
271        if sym or sec:
272            ret += line[:pre]
273            ret += addr
274            if sym:
275                ret += ' ' + sym
276            if sec:
277                ret += ' ' + sec
278            ret += line[post:]
279        return ret
280
281    # Return all ELF sections with the ALLOC flag
282    def read_sections(self, elf_name):
283        if elf_name is None:
284            return
285        if elf_name in self._sections:
286            return
287        elf = self.get_elf(elf_name)
288        cmd = self.arch_prefix('objdump')
289        if not elf or not cmd:
290            return
291        self._sections[elf_name] = []
292        objdump = self.my_Popen([cmd, '--section-headers', elf])
293        for line in iter(objdump.stdout.readline, ''):
294            try:
295                _, name, size, vma, _, _, _ = line.split()
296            except ValueError:
297                if 'ALLOC' in line:
298                    self._sections[elf_name].append([name, int(vma, 16),
299                                                     int(size, 16)])
300
301    def overlaps(self, section, addr, size):
302        sec_addr = section[1]
303        sec_size = section[2]
304        if not size or not sec_size:
305            return False
306        return ((addr <= (sec_addr + sec_size - 1)) and
307                ((addr + size - 1) >= sec_addr))
308
309    def sections_in_region(self, addr, size, elf_idx):
310        ret = ''
311        addr = self.subtract_load_addr(addr)
312        if not addr:
313            return ''
314        iaddr = int(addr, 16)
315        isize = int(size, 16)
316        elf = self._elfs[int(elf_idx)][0]
317        if elf is None:
318            return ''
319        self.read_sections(elf)
320        if elf not in self._sections:
321            return ''
322        for s in self._sections[elf]:
323            if self.overlaps(s, iaddr, isize):
324                ret += ' ' + s[0]
325        return ret
326
327    def reset(self):
328        self._call_stack_found = False
329        if self._addr2line:
330            self._addr2line.terminate()
331            self._addr2line = None
332        self._addr2line_elf_name = None
333        self._arch = None
334        self._saved_abort_line = ''
335        self._sections = {}  # {elf_name: [[name, addr, size], ...], ...}
336        self._regions = []   # [[addr, size, elf_idx, saved line], ...]
337        self._elfs = {0: ["tee.elf", 0]}  # {idx: [uuid, load_addr], ...}
338
339    def pretty_print_path(self, path):
340        if self._strip_path:
341            return re.sub(re.escape(self._strip_path) + '/*', '', path)
342        return path
343
344    def write(self, line):
345            if self._call_stack_found:
346                match = re.search(STACK_ADDR_RE, line)
347                if match:
348                    addr = match.group('addr')
349                    pre = match.start('addr')
350                    post = match.end('addr')
351                    self._out.write(line[:pre])
352                    self._out.write(addr)
353                    res = self.resolve(addr)
354                    res = self.pretty_print_path(res)
355                    self._out.write(' ' + res)
356                    self._out.write(line[post:])
357                    return
358                else:
359                    self.reset()
360            match = re.search(REGION_RE, line)
361            if match:
362                # Region table: save info for later processing once
363                # we know which UUID corresponds to which ELF index
364                addr = match.group('addr')
365                size = match.group('size')
366                elf_idx = match.group('elf_idx')
367                self._regions.append([addr, size, elf_idx, line])
368                return
369            match = re.search(ELF_LIST_RE, line)
370            if match:
371                # ELF list: save info for later. Region table and ELF list
372                # will be displayed when the call stack is reached
373                i = int(match.group('idx'))
374                self._elfs[i] = [match.group('uuid'), match.group('load_addr'),
375                                 line]
376                return
377            match = re.search(CALL_STACK_RE, line)
378            if match:
379                self._call_stack_found = True
380                if self._regions:
381                    for r in self._regions:
382                        r_addr = r[0]
383                        r_size = r[1]
384                        elf_idx = r[2]
385                        saved_line = r[3]
386                        if elf_idx is None:
387                            self._out.write(saved_line)
388                        else:
389                            self._out.write(saved_line.strip() +
390                                            self.sections_in_region(r_addr,
391                                                                    r_size,
392                                                                    elf_idx) +
393                                            '\n')
394                if self._elfs:
395                    for k in self._elfs:
396                        e = self._elfs[k]
397                        if (len(e) >= 3):
398                            # TA executable or library
399                            self._out.write(e[2].strip())
400                            elf = self.get_elf(e[0])
401                            if elf:
402                                rpath = os.path.realpath(elf)
403                                path = self.pretty_print_path(rpath)
404                                self._out.write(' (' + path + ')')
405                            self._out.write('\n')
406                # Here is a good place to resolve the abort address because we
407                # have all the information we need
408                if self._saved_abort_line:
409                    self._out.write(self.process_abort(self._saved_abort_line))
410            match = re.search(ABORT_ADDR_RE, line)
411            if match:
412                self.reset()
413                # At this point the arch and TA load address are unknown.
414                # Save the line so We can translate the abort address later.
415                self._saved_abort_line = line
416            self._out.write(line)
417
418    def flush(self):
419        self._out.flush()
420
421
422def main():
423    args = get_args()
424    if args.dir:
425        # Flatten list in case -d is used several times *and* with multiple
426        # arguments
427        args.dirs = [item for sublist in args.dir for item in sublist]
428    else:
429        args.dirs = []
430    symbolizer = Symbolizer(sys.stdout, args.dirs, args.strip_path)
431
432    for line in sys.stdin:
433        symbolizer.write(line)
434    symbolizer.flush()
435
436
437if __name__ == "__main__":
438    main()
439