xref: /optee_os/scripts/symbolize.py (revision 095567e5e50cbd8918b4e9b181f70d08be7e8a1b)
1#!/usr/bin/env python
2# SPDX-License-Identifier: BSD-2-Clause
3#
4# Copyright (c) 2017, Linaro Limited
5#
6
7
8import argparse
9import glob
10import os
11import re
12import subprocess
13import sys
14
15CALL_STACK_RE = re.compile('Call stack:')
16# This gets the address from lines looking like this:
17# E/TC:0  0x001044a8
18STACK_ADDR_RE = re.compile(r'[UEIDFM]/T[AC]:.*(?P<addr>0x[0-9a-f]+)')
19ABORT_ADDR_RE = re.compile('-abort at address (?P<addr>0x[0-9a-f]+)')
20REGION_RE = re.compile('region [0-9]+: va (?P<addr>0x[0-9a-f]+) '
21                       'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)'
22                       '( flags .{6} (\[(?P<elf_idx>[0-9]+)\])?)?')
23ELF_LIST_RE = re.compile(r'\[(?P<idx>[0-9]+)\] (?P<uuid>[0-9a-f\-]+)'
24                         ' @ (?P<load_addr>0x[0-9a-f\-]+)')
25
26epilog = '''
27This scripts reads an OP-TEE abort or panic message from stdin and adds debug
28information to the output, such as '<function> at <file>:<line>' next to each
29address in the call stack. Any message generated by OP-TEE and containing a
30call stack can in principle be processed by this script. This currently
31includes aborts and panics from the TEE core as well as from any TA.
32The paths provided on the command line are used to locate the appropriate ELF
33binary (tee.elf or Trusted Application). The GNU binutils (addr2line, objdump,
34nm) are used to extract the debug info.
35
36OP-TEE abort and panic messages are sent to the secure console. They look like
37the following:
38
39  E/TC:0 User TA data-abort at address 0xffffdecd (alignment fault)
40  ...
41  E/TC:0 Call stack:
42  E/TC:0  0x4000549e
43  E/TC:0  0x40001f4b
44  E/TC:0  0x4000273f
45  E/TC:0  0x40005da7
46
47Inspired by a script of the same name by the Chromium project.
48
49Sample usage:
50
51  $ scripts/symbolize.py -d out/arm-plat-hikey/core -d ../optee_test/out/ta/*
52  <paste whole dump here>
53  ^D
54'''
55
56
57def get_args():
58    parser = argparse.ArgumentParser(
59                formatter_class=argparse.RawDescriptionHelpFormatter,
60                description='Symbolizes OP-TEE abort dumps',
61                epilog=epilog)
62    parser.add_argument('-d', '--dir', action='append', nargs='+',
63        help='Search for ELF file in DIR. tee.elf is needed to decode '
64             'a TEE Core or pseudo-TA abort, while <TA_uuid>.elf is required '
65             'if a user-mode TA has crashed. For convenience, ELF files '
66             'may also be given.')
67    parser.add_argument('-s', '--strip_path', nargs='?',
68        help='Strip STRIP_PATH from file paths (default: current directory, '
69             'use -s with no argument to show full paths)',
70        default=os.getcwd())
71
72    return parser.parse_args()
73
74
75class Symbolizer(object):
76    def __init__(self, out, dirs, strip_path):
77        self._out = out
78        self._dirs = dirs
79        self._strip_path = strip_path
80        self._addr2line = None
81        self.reset()
82
83    def get_elf(self, elf_or_uuid):
84        if not elf_or_uuid.endswith('.elf'):
85            elf_or_uuid += '.elf'
86        for d in self._dirs:
87            if d.endswith(elf_or_uuid) and os.path.isfile(d):
88                return d
89            elf = glob.glob(d + '/' + elf_or_uuid)
90            if elf:
91                return elf[0]
92
93    def set_arch(self):
94        if self._arch:
95            return
96        elf = self.get_elf(self._elfs[0][0])
97        if elf is None:
98            return
99        p = subprocess.Popen(['file', self.get_elf(self._elfs[0][0])],
100                             stdout=subprocess.PIPE)
101        output = p.stdout.readlines()
102        p.terminate()
103        if 'ARM aarch64,' in output[0]:
104            self._arch = 'aarch64-linux-gnu-'
105        elif 'ARM,' in output[0]:
106            self._arch = 'arm-linux-gnueabihf-'
107
108    def arch_prefix(self, cmd):
109        self.set_arch()
110        if self._arch is None:
111            return ''
112        return self._arch + cmd
113
114    def spawn_addr2line(self, elf_name):
115        if elf_name is None:
116            return
117        if self._addr2line_elf_name is elf_name:
118            return
119        if self._addr2line:
120            self._addr2line.terminate
121            self._addr2line = None
122        elf = self.get_elf(elf_name)
123        if not elf:
124            return
125        cmd = self.arch_prefix('addr2line')
126        if not cmd:
127            return
128        self._addr2line = subprocess.Popen([cmd, '-f', '-p', '-e', elf],
129                                           stdin=subprocess.PIPE,
130                                           stdout=subprocess.PIPE)
131        self._addr2line_elf_name = elf_name
132
133    # If addr falls into a region that maps a TA ELF file, return the load
134    # address of that file.
135    def elf_load_addr(self, addr):
136        if self._regions:
137            for r in self._regions:
138                r_addr = int(r[0], 16)
139                r_size = int(r[1], 16)
140                i_addr = int(addr, 16)
141                if (i_addr >= r_addr and i_addr < (r_addr + r_size)):
142                    # Found region
143                    elf_idx = r[2]
144                    if elf_idx is not None:
145                        return self._elfs[int(elf_idx)][1]
146            return None
147        else:
148            # tee.elf
149            return '0x0'
150
151    def elf_for_addr(self, addr):
152        l_addr = self.elf_load_addr(addr)
153        if l_addr is None:
154            return None
155        if l_addr is '0x0':
156            return 'tee.elf'
157        for k in self._elfs:
158            e = self._elfs[k]
159            if int(e[1], 16) == int(l_addr, 16):
160                return e[0]
161        return None
162
163    def subtract_load_addr(self, addr):
164        l_addr = self.elf_load_addr(addr)
165        if l_addr is None:
166            return None
167        if int(l_addr, 16) > int(addr, 16):
168            return ''
169        return '0x{:x}'.format(int(addr, 16) - int(l_addr, 16))
170
171    def resolve(self, addr):
172        reladdr = self.subtract_load_addr(addr)
173        self.spawn_addr2line(self.elf_for_addr(addr))
174        if not reladdr or not self._addr2line:
175            return '???'
176        try:
177            print >> self._addr2line.stdin, reladdr
178            ret = self._addr2line.stdout.readline().rstrip('\n')
179        except IOError:
180            ret = '!!!'
181        return ret
182
183    def symbol_plus_offset(self, addr):
184        ret = ''
185        prevsize = 0
186        reladdr = self.subtract_load_addr(addr)
187        elf_name = self.elf_for_addr(addr)
188        if elf_name is None:
189            return ''
190        elf = self.get_elf(elf_name)
191        cmd = self.arch_prefix('nm')
192        if not reladdr or not elf or not cmd:
193            return ''
194        ireladdr = int(reladdr, 16)
195        nm = subprocess.Popen([cmd, '--numeric-sort', '--print-size', elf],
196                              stdin=subprocess.PIPE,
197                              stdout=subprocess.PIPE)
198        for line in iter(nm.stdout.readline, ''):
199            try:
200                addr, size, _, name = line.split()
201            except:
202                # Size is missing
203                addr, _, name = line.split()
204                size = '0'
205            iaddr = int(addr, 16)
206            isize = int(size, 16)
207            if iaddr == ireladdr:
208                ret = name
209                break
210            if iaddr < ireladdr and iaddr + isize >= ireladdr:
211                offs = ireladdr - iaddr
212                ret = name + '+' + str(offs)
213                break
214            if iaddr > ireladdr and prevsize == 0:
215                offs = iaddr + ireladdr
216                ret = prevname + '+' + str(offs)
217                break
218            prevsize = size
219            prevname = name
220        nm.terminate()
221        return ret
222
223    def section_plus_offset(self, addr):
224        ret = ''
225        reladdr = self.subtract_load_addr(addr)
226        elf_name = self.elf_for_addr(addr)
227        if elf_name is None:
228            return ''
229        elf = self.get_elf(elf_name)
230        cmd = self.arch_prefix('objdump')
231        if not reladdr or not elf or not cmd:
232            return ''
233        iaddr = int(reladdr, 16)
234        objdump = subprocess.Popen([cmd, '--section-headers', elf],
235                                   stdin=subprocess.PIPE,
236                                   stdout=subprocess.PIPE)
237        for line in iter(objdump.stdout.readline, ''):
238            try:
239                idx, name, size, vma, lma, offs, algn = line.split()
240            except:
241                continue
242            ivma = int(vma, 16)
243            isize = int(size, 16)
244            if ivma == iaddr:
245                ret = name
246                break
247            if ivma < iaddr and ivma + isize >= iaddr:
248                offs = iaddr - ivma
249                ret = name + '+' + str(offs)
250                break
251        objdump.terminate()
252        return ret
253
254    def process_abort(self, line):
255        ret = ''
256        match = re.search(ABORT_ADDR_RE, line)
257        addr = match.group('addr')
258        pre = match.start('addr')
259        post = match.end('addr')
260        sym = self.symbol_plus_offset(addr)
261        sec = self.section_plus_offset(addr)
262        if sym or sec:
263            ret += line[:pre]
264            ret += addr
265            if sym:
266                ret += ' ' + sym
267            if sec:
268                ret += ' ' + sec
269            ret += line[post:]
270        return ret
271
272    # Return all ELF sections with the ALLOC flag
273    def read_sections(self, elf_name):
274        if elf_name is None:
275            return
276        if elf_name in self._sections:
277            return
278        elf = self.get_elf(elf_name)
279        cmd = self.arch_prefix('objdump')
280        if not elf or not cmd:
281            return
282        self._sections[elf_name] = []
283        objdump = subprocess.Popen([cmd, '--section-headers', elf],
284                                   stdin=subprocess.PIPE,
285                                   stdout=subprocess.PIPE)
286        for line in iter(objdump.stdout.readline, ''):
287            try:
288                _, name, size, vma, _, _, _ = line.split()
289            except:
290                if 'ALLOC' in line:
291                    self._sections[elf_name].append([name, int(vma, 16),
292                                                     int(size, 16)])
293
294    def overlaps(self, section, addr, size):
295        sec_addr = section[1]
296        sec_size = section[2]
297        if not size or not sec_size:
298            return False
299        return ((addr <= (sec_addr + sec_size - 1)) and
300                ((addr + size - 1) >= sec_addr))
301
302    def sections_in_region(self, addr, size, elf_idx):
303        ret = ''
304        addr = self.subtract_load_addr(addr)
305        if not addr:
306            return ''
307        iaddr = int(addr, 16)
308        isize = int(size, 16)
309        elf = self._elfs[int(elf_idx)][0]
310        if elf is None:
311            return ''
312        self.read_sections(elf)
313        if elf not in self._sections:
314            return ''
315        for s in self._sections[elf]:
316            if self.overlaps(s, iaddr, isize):
317                ret += ' ' + s[0]
318        return ret
319
320    def reset(self):
321        self._call_stack_found = False
322        if self._addr2line:
323            self._addr2line.terminate()
324            self._addr2line = None
325        self._addr2line_elf_name = None
326        self._arch = None
327        self._saved_abort_line = ''
328        self._sections = {}  # {elf_name: [[name, addr, size], ...], ...}
329        self._regions = []   # [[addr, size, elf_idx, saved line], ...]
330        self._elfs = {0: ["tee.elf", 0]}  # {idx: [uuid, load_addr], ...}
331
332
333    def pretty_print_path(self, path):
334        if self._strip_path:
335            return re.sub(re.escape(self._strip_path) + '/*', '', path)
336        return path
337
338
339    def write(self, line):
340            if self._call_stack_found:
341                match = re.search(STACK_ADDR_RE, line)
342                if match:
343                    addr = match.group('addr')
344                    pre = match.start('addr')
345                    post = match.end('addr')
346                    self._out.write(line[:pre])
347                    self._out.write(addr)
348                    res = self.resolve(addr)
349                    res = self.pretty_print_path(res)
350                    self._out.write(' ' + res)
351                    self._out.write(line[post:])
352                    return
353                else:
354                    self.reset()
355            match = re.search(REGION_RE, line)
356            if match:
357                # Region table: save info for later processing once
358                # we know which UUID corresponds to which ELF index
359                addr = match.group('addr')
360                size = match.group('size')
361                elf_idx = match.group('elf_idx')
362                self._regions.append([addr, size, elf_idx, line])
363                return
364            match = re.search(ELF_LIST_RE, line)
365            if match:
366                # ELF list: save info for later. Region table and ELF list
367                # will be displayed when the call stack is reached
368                i = int(match.group('idx'))
369                self._elfs[i] = [match.group('uuid'), match.group('load_addr'),
370                                 line]
371                return
372            match = re.search(CALL_STACK_RE, line)
373            if match:
374                self._call_stack_found = True
375                if self._regions:
376                    for r in self._regions:
377                        r_addr = r[0]
378                        r_size = r[1]
379                        elf_idx = r[2]
380                        saved_line = r[3]
381                        if elf_idx is None:
382                            self._out.write(saved_line)
383                        else:
384                            self._out.write(saved_line.strip() +
385                                            self.sections_in_region(r_addr,
386                                                                    r_size,
387                                                                    elf_idx) +
388                                            '\n')
389                if self._elfs:
390                    for k in self._elfs:
391                        e = self._elfs[k]
392                        if (len(e) >= 3):
393                            self._out.write(e[2].strip())
394                        elf = self.get_elf(e[0])
395                        if elf:
396                            rpath = os.path.realpath(elf)
397                            path = self.pretty_print_path(rpath)
398                            self._out.write(' (' + path + ')')
399                        self._out.write('\n')
400                # Here is a good place to resolve the abort address because we
401                # have all the information we need
402                if self._saved_abort_line:
403                    self._out.write(self.process_abort(self._saved_abort_line))
404            match = re.search(ABORT_ADDR_RE, line)
405            if match:
406                self.reset()
407                # At this point the arch and TA load address are unknown.
408                # Save the line so We can translate the abort address later.
409                self._saved_abort_line = line
410            self._out.write(line)
411
412    def flush(self):
413        self._out.flush()
414
415
416def main():
417    args = get_args()
418    if args.dir:
419        # Flatten list in case -d is used several times *and* with multiple
420        # arguments
421        args.dirs = [item for sublist in args.dir for item in sublist]
422    else:
423        args.dirs = []
424    symbolizer = Symbolizer(sys.stdout, args.dirs, args.strip_path)
425
426    for line in sys.stdin:
427        symbolizer.write(line)
428    symbolizer.flush()
429
430if __name__ == "__main__":
431    main()
432