xref: /optee_os/scripts/symbolize.py (revision 1d8c2a48d5238322f692f52ab67800a6374c1e6d)
1#!/usr/bin/env python
2# SPDX-License-Identifier: BSD-2-Clause
3#
4# Copyright (c) 2017, Linaro Limited
5#
6
7
8import argparse
9import glob
10import os
11import re
12import subprocess
13import sys
14
15CALL_STACK_RE = re.compile('Call stack:')
16# This gets the address from lines looking like this:
17# E/TC:0  0x001044a8
18STACK_ADDR_RE = re.compile(r'[UEIDFM]/T[AC]:.*(?P<addr>0x[0-9a-f]+)')
19ABORT_ADDR_RE = re.compile('-abort at address (?P<addr>0x[0-9a-f]+)')
20REGION_RE = re.compile('region [0-9]+: va (?P<addr>0x[0-9a-f]+) '
21                       'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)'
22                       '( flags .{6} (\[(?P<elf_idx>[0-9]+)\])?)?')
23ELF_LIST_RE = re.compile(r'\[(?P<idx>[0-9]+)\] (?P<uuid>[0-9a-f\-]+)'
24                         ' @ (?P<load_addr>0x[0-9a-f\-]+)')
25
26epilog = '''
27This scripts reads an OP-TEE abort or panic message from stdin and adds debug
28information to the output, such as '<function> at <file>:<line>' next to each
29address in the call stack. Any message generated by OP-TEE and containing a
30call stack can in principle be processed by this script. This currently
31includes aborts and panics from the TEE core as well as from any TA.
32The paths provided on the command line are used to locate the appropriate ELF
33binary (tee.elf or Trusted Application). The GNU binutils (addr2line, objdump,
34nm) are used to extract the debug info.
35
36OP-TEE abort and panic messages are sent to the secure console. They look like
37the following:
38
39  E/TC:0 User TA data-abort at address 0xffffdecd (alignment fault)
40  ...
41  E/TC:0 Call stack:
42  E/TC:0  0x4000549e
43  E/TC:0  0x40001f4b
44  E/TC:0  0x4000273f
45  E/TC:0  0x40005da7
46
47Inspired by a script of the same name by the Chromium project.
48
49Sample usage:
50
51  $ scripts/symbolize.py -d out/arm-plat-hikey/core -d ../optee_test/out/ta/*
52  <paste whole dump here>
53  ^D
54'''
55
56
57def get_args():
58    parser = argparse.ArgumentParser(
59                formatter_class=argparse.RawDescriptionHelpFormatter,
60                description='Symbolizes OP-TEE abort dumps',
61                epilog=epilog)
62    parser.add_argument('-d', '--dir', action='append', nargs='+',
63                        help='Search for ELF file in DIR. tee.elf is needed '
64                        'to decode a TEE Core or pseudo-TA abort, while '
65                        '<TA_uuid>.elf is required if a user-mode TA has '
66                        'crashed. For convenience, ELF files may also be '
67                        'given.')
68    parser.add_argument('-s', '--strip_path', nargs='?',
69                        help='Strip STRIP_PATH from file paths (default: '
70                        'current directory, use -s with no argument to show '
71                        'full paths)', default=os.getcwd())
72
73    return parser.parse_args()
74
75
76class Symbolizer(object):
77    def __init__(self, out, dirs, strip_path):
78        self._out = out
79        self._dirs = dirs
80        self._strip_path = strip_path
81        self._addr2line = None
82        self.reset()
83
84    def get_elf(self, elf_or_uuid):
85        if not elf_or_uuid.endswith('.elf'):
86            elf_or_uuid += '.elf'
87        for d in self._dirs:
88            if d.endswith(elf_or_uuid) and os.path.isfile(d):
89                return d
90            elf = glob.glob(d + '/' + elf_or_uuid)
91            if elf:
92                return elf[0]
93
94    def set_arch(self):
95        if self._arch:
96            return
97        elf = self.get_elf(self._elfs[0][0])
98        if elf is None:
99            return
100        p = subprocess.Popen(['file', self.get_elf(self._elfs[0][0])],
101                             stdout=subprocess.PIPE)
102        output = p.stdout.readlines()
103        p.terminate()
104        if 'ARM aarch64,' in output[0]:
105            self._arch = 'aarch64-linux-gnu-'
106        elif 'ARM,' in output[0]:
107            self._arch = 'arm-linux-gnueabihf-'
108
109    def arch_prefix(self, cmd):
110        self.set_arch()
111        if self._arch is None:
112            return ''
113        return self._arch + cmd
114
115    def spawn_addr2line(self, elf_name):
116        if elf_name is None:
117            return
118        if self._addr2line_elf_name is elf_name:
119            return
120        if self._addr2line:
121            self._addr2line.terminate
122            self._addr2line = None
123        elf = self.get_elf(elf_name)
124        if not elf:
125            return
126        cmd = self.arch_prefix('addr2line')
127        if not cmd:
128            return
129        self._addr2line = subprocess.Popen([cmd, '-f', '-p', '-e', elf],
130                                           stdin=subprocess.PIPE,
131                                           stdout=subprocess.PIPE)
132        self._addr2line_elf_name = elf_name
133
134    # If addr falls into a region that maps a TA ELF file, return the load
135    # address of that file.
136    def elf_load_addr(self, addr):
137        if self._regions:
138            for r in self._regions:
139                r_addr = int(r[0], 16)
140                r_size = int(r[1], 16)
141                i_addr = int(addr, 16)
142                if (i_addr >= r_addr and i_addr < (r_addr + r_size)):
143                    # Found region
144                    elf_idx = r[2]
145                    if elf_idx is not None:
146                        return self._elfs[int(elf_idx)][1]
147            return None
148        else:
149            # tee.elf
150            return '0x0'
151
152    def elf_for_addr(self, addr):
153        l_addr = self.elf_load_addr(addr)
154        if l_addr is None:
155            return None
156        if l_addr is '0x0':
157            return 'tee.elf'
158        for k in self._elfs:
159            e = self._elfs[k]
160            if int(e[1], 16) == int(l_addr, 16):
161                return e[0]
162        return None
163
164    def subtract_load_addr(self, addr):
165        l_addr = self.elf_load_addr(addr)
166        if l_addr is None:
167            return None
168        if int(l_addr, 16) > int(addr, 16):
169            return ''
170        return '0x{:x}'.format(int(addr, 16) - int(l_addr, 16))
171
172    def resolve(self, addr):
173        reladdr = self.subtract_load_addr(addr)
174        self.spawn_addr2line(self.elf_for_addr(addr))
175        if not reladdr or not self._addr2line:
176            return '???'
177        try:
178            print >> self._addr2line.stdin, reladdr
179            ret = self._addr2line.stdout.readline().rstrip('\n')
180        except IOError:
181            ret = '!!!'
182        return ret
183
184    def symbol_plus_offset(self, addr):
185        ret = ''
186        prevsize = 0
187        reladdr = self.subtract_load_addr(addr)
188        elf_name = self.elf_for_addr(addr)
189        if elf_name is None:
190            return ''
191        elf = self.get_elf(elf_name)
192        cmd = self.arch_prefix('nm')
193        if not reladdr or not elf or not cmd:
194            return ''
195        ireladdr = int(reladdr, 16)
196        nm = subprocess.Popen([cmd, '--numeric-sort', '--print-size', elf],
197                              stdin=subprocess.PIPE,
198                              stdout=subprocess.PIPE)
199        for line in iter(nm.stdout.readline, ''):
200            try:
201                addr, size, _, name = line.split()
202            except ValueError:
203                # Size is missing
204                try:
205                    addr, _, name = line.split()
206                    size = '0'
207                except ValueError:
208                    # E.g., undefined (external) symbols (line = "U symbol")
209                    continue
210            iaddr = int(addr, 16)
211            isize = int(size, 16)
212            if iaddr == ireladdr:
213                ret = name
214                break
215            if iaddr < ireladdr and iaddr + isize >= ireladdr:
216                offs = ireladdr - iaddr
217                ret = name + '+' + str(offs)
218                break
219            if iaddr > ireladdr and prevsize == 0:
220                offs = iaddr + ireladdr
221                ret = prevname + '+' + str(offs)
222                break
223            prevsize = size
224            prevname = name
225        nm.terminate()
226        return ret
227
228    def section_plus_offset(self, addr):
229        ret = ''
230        reladdr = self.subtract_load_addr(addr)
231        elf_name = self.elf_for_addr(addr)
232        if elf_name is None:
233            return ''
234        elf = self.get_elf(elf_name)
235        cmd = self.arch_prefix('objdump')
236        if not reladdr or not elf or not cmd:
237            return ''
238        iaddr = int(reladdr, 16)
239        objdump = subprocess.Popen([cmd, '--section-headers', elf],
240                                   stdin=subprocess.PIPE,
241                                   stdout=subprocess.PIPE)
242        for line in iter(objdump.stdout.readline, ''):
243            try:
244                idx, name, size, vma, lma, offs, algn = line.split()
245            except ValueError:
246                continue
247            ivma = int(vma, 16)
248            isize = int(size, 16)
249            if ivma == iaddr:
250                ret = name
251                break
252            if ivma < iaddr and ivma + isize >= iaddr:
253                offs = iaddr - ivma
254                ret = name + '+' + str(offs)
255                break
256        objdump.terminate()
257        return ret
258
259    def process_abort(self, line):
260        ret = ''
261        match = re.search(ABORT_ADDR_RE, line)
262        addr = match.group('addr')
263        pre = match.start('addr')
264        post = match.end('addr')
265        sym = self.symbol_plus_offset(addr)
266        sec = self.section_plus_offset(addr)
267        if sym or sec:
268            ret += line[:pre]
269            ret += addr
270            if sym:
271                ret += ' ' + sym
272            if sec:
273                ret += ' ' + sec
274            ret += line[post:]
275        return ret
276
277    # Return all ELF sections with the ALLOC flag
278    def read_sections(self, elf_name):
279        if elf_name is None:
280            return
281        if elf_name in self._sections:
282            return
283        elf = self.get_elf(elf_name)
284        cmd = self.arch_prefix('objdump')
285        if not elf or not cmd:
286            return
287        self._sections[elf_name] = []
288        objdump = subprocess.Popen([cmd, '--section-headers', elf],
289                                   stdin=subprocess.PIPE,
290                                   stdout=subprocess.PIPE)
291        for line in iter(objdump.stdout.readline, ''):
292            try:
293                _, name, size, vma, _, _, _ = line.split()
294            except ValueError:
295                if 'ALLOC' in line:
296                    self._sections[elf_name].append([name, int(vma, 16),
297                                                     int(size, 16)])
298
299    def overlaps(self, section, addr, size):
300        sec_addr = section[1]
301        sec_size = section[2]
302        if not size or not sec_size:
303            return False
304        return ((addr <= (sec_addr + sec_size - 1)) and
305                ((addr + size - 1) >= sec_addr))
306
307    def sections_in_region(self, addr, size, elf_idx):
308        ret = ''
309        addr = self.subtract_load_addr(addr)
310        if not addr:
311            return ''
312        iaddr = int(addr, 16)
313        isize = int(size, 16)
314        elf = self._elfs[int(elf_idx)][0]
315        if elf is None:
316            return ''
317        self.read_sections(elf)
318        if elf not in self._sections:
319            return ''
320        for s in self._sections[elf]:
321            if self.overlaps(s, iaddr, isize):
322                ret += ' ' + s[0]
323        return ret
324
325    def reset(self):
326        self._call_stack_found = False
327        if self._addr2line:
328            self._addr2line.terminate()
329            self._addr2line = None
330        self._addr2line_elf_name = None
331        self._arch = None
332        self._saved_abort_line = ''
333        self._sections = {}  # {elf_name: [[name, addr, size], ...], ...}
334        self._regions = []   # [[addr, size, elf_idx, saved line], ...]
335        self._elfs = {0: ["tee.elf", 0]}  # {idx: [uuid, load_addr], ...}
336
337    def pretty_print_path(self, path):
338        if self._strip_path:
339            return re.sub(re.escape(self._strip_path) + '/*', '', path)
340        return path
341
342    def write(self, line):
343            if self._call_stack_found:
344                match = re.search(STACK_ADDR_RE, line)
345                if match:
346                    addr = match.group('addr')
347                    pre = match.start('addr')
348                    post = match.end('addr')
349                    self._out.write(line[:pre])
350                    self._out.write(addr)
351                    res = self.resolve(addr)
352                    res = self.pretty_print_path(res)
353                    self._out.write(' ' + res)
354                    self._out.write(line[post:])
355                    return
356                else:
357                    self.reset()
358            match = re.search(REGION_RE, line)
359            if match:
360                # Region table: save info for later processing once
361                # we know which UUID corresponds to which ELF index
362                addr = match.group('addr')
363                size = match.group('size')
364                elf_idx = match.group('elf_idx')
365                self._regions.append([addr, size, elf_idx, line])
366                return
367            match = re.search(ELF_LIST_RE, line)
368            if match:
369                # ELF list: save info for later. Region table and ELF list
370                # will be displayed when the call stack is reached
371                i = int(match.group('idx'))
372                self._elfs[i] = [match.group('uuid'), match.group('load_addr'),
373                                 line]
374                return
375            match = re.search(CALL_STACK_RE, line)
376            if match:
377                self._call_stack_found = True
378                if self._regions:
379                    for r in self._regions:
380                        r_addr = r[0]
381                        r_size = r[1]
382                        elf_idx = r[2]
383                        saved_line = r[3]
384                        if elf_idx is None:
385                            self._out.write(saved_line)
386                        else:
387                            self._out.write(saved_line.strip() +
388                                            self.sections_in_region(r_addr,
389                                                                    r_size,
390                                                                    elf_idx) +
391                                            '\n')
392                if self._elfs:
393                    for k in self._elfs:
394                        e = self._elfs[k]
395                        if (len(e) >= 3):
396                            # TA executable or library
397                            self._out.write(e[2].strip())
398                            elf = self.get_elf(e[0])
399                            if elf:
400                                rpath = os.path.realpath(elf)
401                                path = self.pretty_print_path(rpath)
402                                self._out.write(' (' + path + ')')
403                            self._out.write('\n')
404                # Here is a good place to resolve the abort address because we
405                # have all the information we need
406                if self._saved_abort_line:
407                    self._out.write(self.process_abort(self._saved_abort_line))
408            match = re.search(ABORT_ADDR_RE, line)
409            if match:
410                self.reset()
411                # At this point the arch and TA load address are unknown.
412                # Save the line so We can translate the abort address later.
413                self._saved_abort_line = line
414            self._out.write(line)
415
416    def flush(self):
417        self._out.flush()
418
419
420def main():
421    args = get_args()
422    if args.dir:
423        # Flatten list in case -d is used several times *and* with multiple
424        # arguments
425        args.dirs = [item for sublist in args.dir for item in sublist]
426    else:
427        args.dirs = []
428    symbolizer = Symbolizer(sys.stdout, args.dirs, args.strip_path)
429
430    for line in sys.stdin:
431        symbolizer.write(line)
432    symbolizer.flush()
433
434
435if __name__ == "__main__":
436    main()
437