xref: /optee_os/scripts/symbolize.py (revision 5a913ee74d3c71af2a2860ce8a4e7aeab2916f9b)
1#!/usr/bin/env python
2# SPDX-License-Identifier: BSD-2-Clause
3#
4# Copyright (c) 2017, Linaro Limited
5#
6
7
8import argparse
9import glob
10import os
11import re
12import subprocess
13import sys
14
15CALL_STACK_RE = re.compile('Call stack:')
16# This gets the address from lines looking like this:
17# E/TC:0  0x001044a8
18STACK_ADDR_RE = re.compile(
19    r'[UEIDFM]/(TC|LD):(\?*|[0-9]*) [0-9]* +(?P<addr>0x[0-9a-f]+)')
20ABORT_ADDR_RE = re.compile(r'-abort at address (?P<addr>0x[0-9a-f]+)')
21REGION_RE = re.compile(r'region +[0-9]+: va (?P<addr>0x[0-9a-f]+) '
22                       r'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)'
23                       r'( flags .{4} (\[(?P<elf_idx>[0-9]+)\])?)?')
24ELF_LIST_RE = re.compile(r'\[(?P<idx>[0-9]+)\] (?P<uuid>[0-9a-f\-]+)'
25                         r' @ (?P<load_addr>0x[0-9a-f\-]+)')
26FUNC_GRAPH_RE = re.compile(r'Function graph')
27GRAPH_ADDR_RE = re.compile(r'(?P<addr>0x[0-9a-f]+)')
28GRAPH_RE = re.compile(r'}')
29
30epilog = '''
31This scripts reads an OP-TEE abort or panic message from stdin and adds debug
32information to the output, such as '<function> at <file>:<line>' next to each
33address in the call stack. Any message generated by OP-TEE and containing a
34call stack can in principle be processed by this script. This currently
35includes aborts and panics from the TEE core as well as from any TA.
36The paths provided on the command line are used to locate the appropriate ELF
37binary (tee.elf or Trusted Application). The GNU binutils (addr2line, objdump,
38nm) are used to extract the debug info. If the CROSS_COMPILE environment
39variable is set, it is used as a prefix to the binutils tools. That is, the
40script will invoke $(CROSS_COMPILE)addr2line etc. If it is not set however,
41the prefix will be determined automatically for each ELF file based on its
42architecture (arm-linux-gnueabihf-, aarch64-linux-gnu-). The resulting command
43is then expected to be found in the user's PATH.
44
45OP-TEE abort and panic messages are sent to the secure console. They look like
46the following:
47
48  E/TC:0 User TA data-abort at address 0xffffdecd (alignment fault)
49  ...
50  E/TC:0 Call stack:
51  E/TC:0  0x4000549e
52  E/TC:0  0x40001f4b
53  E/TC:0  0x4000273f
54  E/TC:0  0x40005da7
55
56Inspired by a script of the same name by the Chromium project.
57
58Sample usage:
59
60  $ scripts/symbolize.py -d out/arm-plat-hikey/core -d ../optee_test/out/ta/*
61  <paste whole dump here>
62  ^D
63
64Also, this script reads function graph generated for OP-TEE user TA from
65/tmp/ftrace-<ta_uuid>.out file and resolves function addresses to corresponding
66symbols.
67
68Sample usage:
69
70  $ cat /tmp/ftrace-<ta_uuid>.out | scripts/symbolize.py -d <ta_uuid>.elf
71  <paste function graph here>
72  ^D
73'''
74
75
76def get_args():
77    parser = argparse.ArgumentParser(
78        formatter_class=argparse.RawDescriptionHelpFormatter,
79        description='Symbolizes OP-TEE abort dumps or function graphs',
80        epilog=epilog)
81    parser.add_argument('-d', '--dir', action='append', nargs='+',
82                        help='Search for ELF file in DIR. tee.elf is needed '
83                        'to decode a TEE Core or pseudo-TA abort, while '
84                        '<TA_uuid>.elf is required if a user-mode TA has '
85                        'crashed. For convenience, ELF files may also be '
86                        'given.')
87    parser.add_argument('-s', '--strip_path', nargs='?',
88                        help='Strip STRIP_PATH from file paths (default: '
89                        'current directory, use -s with no argument to show '
90                        'full paths)', default=os.getcwd())
91
92    return parser.parse_args()
93
94
95class Symbolizer(object):
96    def __init__(self, out, dirs, strip_path):
97        self._out = out
98        self._dirs = dirs
99        self._strip_path = strip_path
100        self._addr2line = None
101        self.reset()
102
103    def my_Popen(self, cmd):
104        try:
105            return subprocess.Popen(cmd, stdin=subprocess.PIPE,
106                                    stdout=subprocess.PIPE)
107        except OSError as e:
108            if e.errno == os.errno.ENOENT:
109                print >> sys.stderr, "*** Error:", cmd[0] + \
110                    ": command not found"
111                sys.exit(1)
112
113    def get_elf(self, elf_or_uuid):
114        if not elf_or_uuid.endswith('.elf'):
115            elf_or_uuid += '.elf'
116        for d in self._dirs:
117            if d.endswith(elf_or_uuid) and os.path.isfile(d):
118                return d
119            elf = glob.glob(d + '/' + elf_or_uuid)
120            if elf:
121                return elf[0]
122
123    def set_arch(self):
124        if self._arch:
125            return
126        self._arch = os.getenv('CROSS_COMPILE')
127        if self._arch:
128            return
129        elf = self.get_elf(self._elfs[0][0])
130        if elf is None:
131            return
132        p = subprocess.Popen(['file', self.get_elf(self._elfs[0][0])],
133                             stdout=subprocess.PIPE)
134        output = p.stdout.readlines()
135        p.terminate()
136        if 'ARM aarch64,' in output[0]:
137            self._arch = 'aarch64-linux-gnu-'
138        elif 'ARM,' in output[0]:
139            self._arch = 'arm-linux-gnueabihf-'
140
141    def arch_prefix(self, cmd):
142        self.set_arch()
143        if self._arch is None:
144            return ''
145        return self._arch + cmd
146
147    def spawn_addr2line(self, elf_name):
148        if elf_name is None:
149            return
150        if self._addr2line_elf_name is elf_name:
151            return
152        if self._addr2line:
153            self._addr2line.terminate
154            self._addr2line = None
155        elf = self.get_elf(elf_name)
156        if not elf:
157            return
158        cmd = self.arch_prefix('addr2line')
159        if not cmd:
160            return
161        self._addr2line = self.my_Popen([cmd, '-f', '-p', '-e', elf])
162        self._addr2line_elf_name = elf_name
163
164    # If addr falls into a region that maps a TA ELF file, return the load
165    # address of that file.
166    def elf_load_addr(self, addr):
167        if self._regions:
168            for r in self._regions:
169                r_addr = int(r[0], 16)
170                r_size = int(r[1], 16)
171                i_addr = int(addr, 16)
172                if (i_addr >= r_addr and i_addr < (r_addr + r_size)):
173                    # Found region
174                    elf_idx = r[2]
175                    if elf_idx is not None:
176                        return self._elfs[int(elf_idx)][1]
177            return None
178        else:
179            # tee.elf
180            return '0x0'
181
182    def elf_for_addr(self, addr):
183        l_addr = self.elf_load_addr(addr)
184        if l_addr is None:
185            return None
186        if l_addr is '0x0':
187            return 'tee.elf'
188        for k in self._elfs:
189            e = self._elfs[k]
190            if int(e[1], 16) == int(l_addr, 16):
191                return e[0]
192        return None
193
194    def subtract_load_addr(self, addr):
195        l_addr = self.elf_load_addr(addr)
196        if l_addr is None:
197            return None
198        if int(l_addr, 16) > int(addr, 16):
199            return ''
200        return '0x{:x}'.format(int(addr, 16) - int(l_addr, 16))
201
202    def resolve(self, addr):
203        reladdr = self.subtract_load_addr(addr)
204        self.spawn_addr2line(self.elf_for_addr(addr))
205        if not reladdr or not self._addr2line:
206            return '???'
207        try:
208            print >> self._addr2line.stdin, reladdr
209            ret = self._addr2line.stdout.readline().rstrip('\n')
210        except IOError:
211            ret = '!!!'
212        return ret
213
214    def symbol_plus_offset(self, addr):
215        ret = ''
216        prevsize = 0
217        reladdr = self.subtract_load_addr(addr)
218        elf_name = self.elf_for_addr(addr)
219        if elf_name is None:
220            return ''
221        elf = self.get_elf(elf_name)
222        cmd = self.arch_prefix('nm')
223        if not reladdr or not elf or not cmd:
224            return ''
225        ireladdr = int(reladdr, 16)
226        nm = self.my_Popen([cmd, '--numeric-sort', '--print-size', elf])
227        for line in iter(nm.stdout.readline, ''):
228            try:
229                addr, size, _, name = line.split()
230            except ValueError:
231                # Size is missing
232                try:
233                    addr, _, name = line.split()
234                    size = '0'
235                except ValueError:
236                    # E.g., undefined (external) symbols (line = "U symbol")
237                    continue
238            iaddr = int(addr, 16)
239            isize = int(size, 16)
240            if iaddr == ireladdr:
241                ret = name
242                break
243            if iaddr < ireladdr and iaddr + isize >= ireladdr:
244                offs = ireladdr - iaddr
245                ret = name + '+' + str(offs)
246                break
247            if iaddr > ireladdr and prevsize == 0:
248                offs = iaddr + ireladdr
249                ret = prevname + '+' + str(offs)
250                break
251            prevsize = size
252            prevname = name
253        nm.terminate()
254        return ret
255
256    def section_plus_offset(self, addr):
257        ret = ''
258        reladdr = self.subtract_load_addr(addr)
259        elf_name = self.elf_for_addr(addr)
260        if elf_name is None:
261            return ''
262        elf = self.get_elf(elf_name)
263        cmd = self.arch_prefix('objdump')
264        if not reladdr or not elf or not cmd:
265            return ''
266        iaddr = int(reladdr, 16)
267        objdump = self.my_Popen([cmd, '--section-headers', elf])
268        for line in iter(objdump.stdout.readline, ''):
269            try:
270                idx, name, size, vma, lma, offs, algn = line.split()
271            except ValueError:
272                continue
273            ivma = int(vma, 16)
274            isize = int(size, 16)
275            if ivma == iaddr:
276                ret = name
277                break
278            if ivma < iaddr and ivma + isize >= iaddr:
279                offs = iaddr - ivma
280                ret = name + '+' + str(offs)
281                break
282        objdump.terminate()
283        return ret
284
285    def process_abort(self, line):
286        ret = ''
287        match = re.search(ABORT_ADDR_RE, line)
288        addr = match.group('addr')
289        pre = match.start('addr')
290        post = match.end('addr')
291        sym = self.symbol_plus_offset(addr)
292        sec = self.section_plus_offset(addr)
293        if sym or sec:
294            ret += line[:pre]
295            ret += addr
296            if sym:
297                ret += ' ' + sym
298            if sec:
299                ret += ' ' + sec
300            ret += line[post:]
301        return ret
302
303    # Return all ELF sections with the ALLOC flag
304    def read_sections(self, elf_name):
305        if elf_name is None:
306            return
307        if elf_name in self._sections:
308            return
309        elf = self.get_elf(elf_name)
310        cmd = self.arch_prefix('objdump')
311        if not elf or not cmd:
312            return
313        self._sections[elf_name] = []
314        objdump = self.my_Popen([cmd, '--section-headers', elf])
315        for line in iter(objdump.stdout.readline, ''):
316            try:
317                _, name, size, vma, _, _, _ = line.split()
318            except ValueError:
319                if 'ALLOC' in line:
320                    self._sections[elf_name].append([name, int(vma, 16),
321                                                     int(size, 16)])
322
323    def overlaps(self, section, addr, size):
324        sec_addr = section[1]
325        sec_size = section[2]
326        if not size or not sec_size:
327            return False
328        return ((addr <= (sec_addr + sec_size - 1)) and
329                ((addr + size - 1) >= sec_addr))
330
331    def sections_in_region(self, addr, size, elf_idx):
332        ret = ''
333        addr = self.subtract_load_addr(addr)
334        if not addr:
335            return ''
336        iaddr = int(addr, 16)
337        isize = int(size, 16)
338        elf = self._elfs[int(elf_idx)][0]
339        if elf is None:
340            return ''
341        self.read_sections(elf)
342        if elf not in self._sections:
343            return ''
344        for s in self._sections[elf]:
345            if self.overlaps(s, iaddr, isize):
346                ret += ' ' + s[0]
347        return ret
348
349    def reset(self):
350        self._call_stack_found = False
351        if self._addr2line:
352            self._addr2line.terminate()
353            self._addr2line = None
354        self._addr2line_elf_name = None
355        self._arch = None
356        self._saved_abort_line = ''
357        self._sections = {}  # {elf_name: [[name, addr, size], ...], ...}
358        self._regions = []   # [[addr, size, elf_idx, saved line], ...]
359        self._elfs = {0: ["tee.elf", 0]}  # {idx: [uuid, load_addr], ...}
360        self._func_graph_found = False
361        self._func_graph_skip_line = True
362
363    def pretty_print_path(self, path):
364        if self._strip_path:
365            return re.sub(re.escape(self._strip_path) + '/*', '', path)
366        return path
367
368    def write(self, line):
369        if self._call_stack_found:
370            match = re.search(STACK_ADDR_RE, line)
371            if match:
372                addr = match.group('addr')
373                pre = match.start('addr')
374                post = match.end('addr')
375                self._out.write(line[:pre])
376                self._out.write(addr)
377                res = self.resolve(addr)
378                res = self.pretty_print_path(res)
379                self._out.write(' ' + res)
380                self._out.write(line[post:])
381                return
382            else:
383                self.reset()
384        if self._func_graph_found:
385            match = re.search(GRAPH_ADDR_RE, line)
386            match_re = re.search(GRAPH_RE, line)
387            if match:
388                addr = match.group('addr')
389                pre = match.start('addr')
390                post = match.end('addr')
391                self._out.write(line[:pre])
392                res = self.resolve(addr)
393                res_arr = re.split(' ', res)
394                self._out.write(res_arr[0])
395                self._out.write(line[post:])
396                self._func_graph_skip_line = False
397                return
398            elif match_re:
399                self._out.write(line)
400                return
401            elif self._func_graph_skip_line:
402                return
403            else:
404                self.reset()
405        match = re.search(REGION_RE, line)
406        if match:
407            # Region table: save info for later processing once
408            # we know which UUID corresponds to which ELF index
409            addr = match.group('addr')
410            size = match.group('size')
411            elf_idx = match.group('elf_idx')
412            self._regions.append([addr, size, elf_idx, line])
413            return
414        match = re.search(ELF_LIST_RE, line)
415        if match:
416            # ELF list: save info for later. Region table and ELF list
417            # will be displayed when the call stack is reached
418            i = int(match.group('idx'))
419            self._elfs[i] = [match.group('uuid'), match.group('load_addr'),
420                             line]
421            return
422        match = re.search(CALL_STACK_RE, line)
423        if match:
424            self._call_stack_found = True
425            if self._regions:
426                for r in self._regions:
427                    r_addr = r[0]
428                    r_size = r[1]
429                    elf_idx = r[2]
430                    saved_line = r[3]
431                    if elf_idx is None:
432                        self._out.write(saved_line)
433                    else:
434                        self._out.write(saved_line.strip() +
435                                        self.sections_in_region(r_addr,
436                                                                r_size,
437                                                                elf_idx) +
438                                        '\n')
439            if self._elfs:
440                for k in self._elfs:
441                    e = self._elfs[k]
442                    if (len(e) >= 3):
443                        # TA executable or library
444                        self._out.write(e[2].strip())
445                        elf = self.get_elf(e[0])
446                        if elf:
447                            rpath = os.path.realpath(elf)
448                            path = self.pretty_print_path(rpath)
449                            self._out.write(' (' + path + ')')
450                        self._out.write('\n')
451            # Here is a good place to resolve the abort address because we
452            # have all the information we need
453            if self._saved_abort_line:
454                self._out.write(self.process_abort(self._saved_abort_line))
455        match = re.search(FUNC_GRAPH_RE, line)
456        if match:
457            self._func_graph_found = True
458        match = re.search(ABORT_ADDR_RE, line)
459        if match:
460            self.reset()
461            # At this point the arch and TA load address are unknown.
462            # Save the line so We can translate the abort address later.
463            self._saved_abort_line = line
464        self._out.write(line)
465
466    def flush(self):
467        self._out.flush()
468
469
470def main():
471    args = get_args()
472    if args.dir:
473        # Flatten list in case -d is used several times *and* with multiple
474        # arguments
475        args.dirs = [item for sublist in args.dir for item in sublist]
476    else:
477        args.dirs = []
478    symbolizer = Symbolizer(sys.stdout, args.dirs, args.strip_path)
479
480    for line in sys.stdin:
481        symbolizer.write(line)
482    symbolizer.flush()
483
484
485if __name__ == "__main__":
486    main()
487