xref: /optee_os/scripts/symbolize.py (revision f9089765b4360daae4ab64fd150638c81d805e95)
1#!/usr/bin/env python
2# SPDX-License-Identifier: BSD-2-Clause
3#
4# Copyright (c) 2017, Linaro Limited
5#
6
7
8import argparse
9import glob
10import os
11import re
12import subprocess
13import sys
14
15CALL_STACK_RE = re.compile('Call stack:')
16# This gets the address from lines looking like this:
17# E/TC:0  0x001044a8
18STACK_ADDR_RE = re.compile(r'[UEIDFM]/T[AC]:.*(?P<addr>0x[0-9a-f]+)')
19ABORT_ADDR_RE = re.compile('-abort at address (?P<addr>0x[0-9a-f]+)')
20REGION_RE = re.compile('region [0-9]+: va (?P<addr>0x[0-9a-f]+) '
21                       'pa 0x[0-9a-f]+ size (?P<size>0x[0-9a-f]+)'
22                       '( flags .{6} (\[(?P<elf_idx>[0-9]+)\])?)?')
23ELF_LIST_RE = re.compile(r'\[(?P<idx>[0-9]+)\] (?P<uuid>[0-9a-f\-]+)'
24                         ' @ (?P<load_addr>0x[0-9a-f\-]+)')
25
26epilog = '''
27This scripts reads an OP-TEE abort or panic message from stdin and adds debug
28information to the output, such as '<function> at <file>:<line>' next to each
29address in the call stack. Any message generated by OP-TEE and containing a
30call stack can in principle be processed by this script. This currently
31includes aborts and panics from the TEE core as well as from any TA.
32The paths provided on the command line are used to locate the appropriate ELF
33binary (tee.elf or Trusted Application). The GNU binutils (addr2line, objdump,
34nm) are used to extract the debug info. If the CROSS_COMPILE environment
35variable is set, it is used as a prefix to the binutils tools. That is, the
36script will invoke $(CROSS_COMPILE)addr2line etc. If it is not set however,
37the prefix will be determined automatically for each ELF file based on its
38architecture (arm-linux-gnueabihf-, aarch64-linux-gnu-). The resulting command
39is then expected to be found in the user's PATH.
40
41OP-TEE abort and panic messages are sent to the secure console. They look like
42the following:
43
44  E/TC:0 User TA data-abort at address 0xffffdecd (alignment fault)
45  ...
46  E/TC:0 Call stack:
47  E/TC:0  0x4000549e
48  E/TC:0  0x40001f4b
49  E/TC:0  0x4000273f
50  E/TC:0  0x40005da7
51
52Inspired by a script of the same name by the Chromium project.
53
54Sample usage:
55
56  $ scripts/symbolize.py -d out/arm-plat-hikey/core -d ../optee_test/out/ta/*
57  <paste whole dump here>
58  ^D
59'''
60
61
62def get_args():
63    parser = argparse.ArgumentParser(
64                formatter_class=argparse.RawDescriptionHelpFormatter,
65                description='Symbolizes OP-TEE abort dumps',
66                epilog=epilog)
67    parser.add_argument('-d', '--dir', action='append', nargs='+',
68                        help='Search for ELF file in DIR. tee.elf is needed '
69                        'to decode a TEE Core or pseudo-TA abort, while '
70                        '<TA_uuid>.elf is required if a user-mode TA has '
71                        'crashed. For convenience, ELF files may also be '
72                        'given.')
73    parser.add_argument('-s', '--strip_path', nargs='?',
74                        help='Strip STRIP_PATH from file paths (default: '
75                        'current directory, use -s with no argument to show '
76                        'full paths)', default=os.getcwd())
77
78    return parser.parse_args()
79
80
81class Symbolizer(object):
82    def __init__(self, out, dirs, strip_path):
83        self._out = out
84        self._dirs = dirs
85        self._strip_path = strip_path
86        self._addr2line = None
87        self.reset()
88
89    def my_Popen(self, cmd):
90        try:
91            return subprocess.Popen(cmd, stdin=subprocess.PIPE,
92                                    stdout=subprocess.PIPE)
93        except OSError as e:
94            if e.errno == os.errno.ENOENT:
95                print >> sys.stderr, "*** Error:", cmd[0] + \
96                    ": command not found"
97                sys.exit(1)
98
99    def get_elf(self, elf_or_uuid):
100        if not elf_or_uuid.endswith('.elf'):
101            elf_or_uuid += '.elf'
102        for d in self._dirs:
103            if d.endswith(elf_or_uuid) and os.path.isfile(d):
104                return d
105            elf = glob.glob(d + '/' + elf_or_uuid)
106            if elf:
107                return elf[0]
108
109    def set_arch(self):
110        if self._arch:
111            return
112        self._arch = os.getenv('CROSS_COMPILE');
113        if self._arch:
114            return
115        elf = self.get_elf(self._elfs[0][0])
116        if elf is None:
117            return
118        p = subprocess.Popen(['file', self.get_elf(self._elfs[0][0])],
119                             stdout=subprocess.PIPE)
120        output = p.stdout.readlines()
121        p.terminate()
122        if 'ARM aarch64,' in output[0]:
123            self._arch = 'aarch64-linux-gnu-'
124        elif 'ARM,' in output[0]:
125            self._arch = 'arm-linux-gnueabihf-'
126
127    def arch_prefix(self, cmd):
128        self.set_arch()
129        if self._arch is None:
130            return ''
131        return self._arch + cmd
132
133    def spawn_addr2line(self, elf_name):
134        if elf_name is None:
135            return
136        if self._addr2line_elf_name is elf_name:
137            return
138        if self._addr2line:
139            self._addr2line.terminate
140            self._addr2line = None
141        elf = self.get_elf(elf_name)
142        if not elf:
143            return
144        cmd = self.arch_prefix('addr2line')
145        if not cmd:
146            return
147        self._addr2line = self.my_Popen([cmd, '-f', '-p', '-e', elf])
148        self._addr2line_elf_name = elf_name
149
150    # If addr falls into a region that maps a TA ELF file, return the load
151    # address of that file.
152    def elf_load_addr(self, addr):
153        if self._regions:
154            for r in self._regions:
155                r_addr = int(r[0], 16)
156                r_size = int(r[1], 16)
157                i_addr = int(addr, 16)
158                if (i_addr >= r_addr and i_addr < (r_addr + r_size)):
159                    # Found region
160                    elf_idx = r[2]
161                    if elf_idx is not None:
162                        return self._elfs[int(elf_idx)][1]
163            return None
164        else:
165            # tee.elf
166            return '0x0'
167
168    def elf_for_addr(self, addr):
169        l_addr = self.elf_load_addr(addr)
170        if l_addr is None:
171            return None
172        if l_addr is '0x0':
173            return 'tee.elf'
174        for k in self._elfs:
175            e = self._elfs[k]
176            if int(e[1], 16) == int(l_addr, 16):
177                return e[0]
178        return None
179
180    def subtract_load_addr(self, addr):
181        l_addr = self.elf_load_addr(addr)
182        if l_addr is None:
183            return None
184        if int(l_addr, 16) > int(addr, 16):
185            return ''
186        return '0x{:x}'.format(int(addr, 16) - int(l_addr, 16))
187
188    def resolve(self, addr):
189        reladdr = self.subtract_load_addr(addr)
190        self.spawn_addr2line(self.elf_for_addr(addr))
191        if not reladdr or not self._addr2line:
192            return '???'
193        try:
194            print >> self._addr2line.stdin, reladdr
195            ret = self._addr2line.stdout.readline().rstrip('\n')
196        except IOError:
197            ret = '!!!'
198        return ret
199
200    def symbol_plus_offset(self, addr):
201        ret = ''
202        prevsize = 0
203        reladdr = self.subtract_load_addr(addr)
204        elf_name = self.elf_for_addr(addr)
205        if elf_name is None:
206            return ''
207        elf = self.get_elf(elf_name)
208        cmd = self.arch_prefix('nm')
209        if not reladdr or not elf or not cmd:
210            return ''
211        ireladdr = int(reladdr, 16)
212        nm = self.my_Popen([cmd, '--numeric-sort', '--print-size', elf])
213        for line in iter(nm.stdout.readline, ''):
214            try:
215                addr, size, _, name = line.split()
216            except ValueError:
217                # Size is missing
218                try:
219                    addr, _, name = line.split()
220                    size = '0'
221                except ValueError:
222                    # E.g., undefined (external) symbols (line = "U symbol")
223                    continue
224            iaddr = int(addr, 16)
225            isize = int(size, 16)
226            if iaddr == ireladdr:
227                ret = name
228                break
229            if iaddr < ireladdr and iaddr + isize >= ireladdr:
230                offs = ireladdr - iaddr
231                ret = name + '+' + str(offs)
232                break
233            if iaddr > ireladdr and prevsize == 0:
234                offs = iaddr + ireladdr
235                ret = prevname + '+' + str(offs)
236                break
237            prevsize = size
238            prevname = name
239        nm.terminate()
240        return ret
241
242    def section_plus_offset(self, addr):
243        ret = ''
244        reladdr = self.subtract_load_addr(addr)
245        elf_name = self.elf_for_addr(addr)
246        if elf_name is None:
247            return ''
248        elf = self.get_elf(elf_name)
249        cmd = self.arch_prefix('objdump')
250        if not reladdr or not elf or not cmd:
251            return ''
252        iaddr = int(reladdr, 16)
253        objdump = self.my_Popen([cmd, '--section-headers', elf])
254        for line in iter(objdump.stdout.readline, ''):
255            try:
256                idx, name, size, vma, lma, offs, algn = line.split()
257            except ValueError:
258                continue
259            ivma = int(vma, 16)
260            isize = int(size, 16)
261            if ivma == iaddr:
262                ret = name
263                break
264            if ivma < iaddr and ivma + isize >= iaddr:
265                offs = iaddr - ivma
266                ret = name + '+' + str(offs)
267                break
268        objdump.terminate()
269        return ret
270
271    def process_abort(self, line):
272        ret = ''
273        match = re.search(ABORT_ADDR_RE, line)
274        addr = match.group('addr')
275        pre = match.start('addr')
276        post = match.end('addr')
277        sym = self.symbol_plus_offset(addr)
278        sec = self.section_plus_offset(addr)
279        if sym or sec:
280            ret += line[:pre]
281            ret += addr
282            if sym:
283                ret += ' ' + sym
284            if sec:
285                ret += ' ' + sec
286            ret += line[post:]
287        return ret
288
289    # Return all ELF sections with the ALLOC flag
290    def read_sections(self, elf_name):
291        if elf_name is None:
292            return
293        if elf_name in self._sections:
294            return
295        elf = self.get_elf(elf_name)
296        cmd = self.arch_prefix('objdump')
297        if not elf or not cmd:
298            return
299        self._sections[elf_name] = []
300        objdump = self.my_Popen([cmd, '--section-headers', elf])
301        for line in iter(objdump.stdout.readline, ''):
302            try:
303                _, name, size, vma, _, _, _ = line.split()
304            except ValueError:
305                if 'ALLOC' in line:
306                    self._sections[elf_name].append([name, int(vma, 16),
307                                                     int(size, 16)])
308
309    def overlaps(self, section, addr, size):
310        sec_addr = section[1]
311        sec_size = section[2]
312        if not size or not sec_size:
313            return False
314        return ((addr <= (sec_addr + sec_size - 1)) and
315                ((addr + size - 1) >= sec_addr))
316
317    def sections_in_region(self, addr, size, elf_idx):
318        ret = ''
319        addr = self.subtract_load_addr(addr)
320        if not addr:
321            return ''
322        iaddr = int(addr, 16)
323        isize = int(size, 16)
324        elf = self._elfs[int(elf_idx)][0]
325        if elf is None:
326            return ''
327        self.read_sections(elf)
328        if elf not in self._sections:
329            return ''
330        for s in self._sections[elf]:
331            if self.overlaps(s, iaddr, isize):
332                ret += ' ' + s[0]
333        return ret
334
335    def reset(self):
336        self._call_stack_found = False
337        if self._addr2line:
338            self._addr2line.terminate()
339            self._addr2line = None
340        self._addr2line_elf_name = None
341        self._arch = None
342        self._saved_abort_line = ''
343        self._sections = {}  # {elf_name: [[name, addr, size], ...], ...}
344        self._regions = []   # [[addr, size, elf_idx, saved line], ...]
345        self._elfs = {0: ["tee.elf", 0]}  # {idx: [uuid, load_addr], ...}
346
347    def pretty_print_path(self, path):
348        if self._strip_path:
349            return re.sub(re.escape(self._strip_path) + '/*', '', path)
350        return path
351
352    def write(self, line):
353            if self._call_stack_found:
354                match = re.search(STACK_ADDR_RE, line)
355                if match:
356                    addr = match.group('addr')
357                    pre = match.start('addr')
358                    post = match.end('addr')
359                    self._out.write(line[:pre])
360                    self._out.write(addr)
361                    res = self.resolve(addr)
362                    res = self.pretty_print_path(res)
363                    self._out.write(' ' + res)
364                    self._out.write(line[post:])
365                    return
366                else:
367                    self.reset()
368            match = re.search(REGION_RE, line)
369            if match:
370                # Region table: save info for later processing once
371                # we know which UUID corresponds to which ELF index
372                addr = match.group('addr')
373                size = match.group('size')
374                elf_idx = match.group('elf_idx')
375                self._regions.append([addr, size, elf_idx, line])
376                return
377            match = re.search(ELF_LIST_RE, line)
378            if match:
379                # ELF list: save info for later. Region table and ELF list
380                # will be displayed when the call stack is reached
381                i = int(match.group('idx'))
382                self._elfs[i] = [match.group('uuid'), match.group('load_addr'),
383                                 line]
384                return
385            match = re.search(CALL_STACK_RE, line)
386            if match:
387                self._call_stack_found = True
388                if self._regions:
389                    for r in self._regions:
390                        r_addr = r[0]
391                        r_size = r[1]
392                        elf_idx = r[2]
393                        saved_line = r[3]
394                        if elf_idx is None:
395                            self._out.write(saved_line)
396                        else:
397                            self._out.write(saved_line.strip() +
398                                            self.sections_in_region(r_addr,
399                                                                    r_size,
400                                                                    elf_idx) +
401                                            '\n')
402                if self._elfs:
403                    for k in self._elfs:
404                        e = self._elfs[k]
405                        if (len(e) >= 3):
406                            # TA executable or library
407                            self._out.write(e[2].strip())
408                            elf = self.get_elf(e[0])
409                            if elf:
410                                rpath = os.path.realpath(elf)
411                                path = self.pretty_print_path(rpath)
412                                self._out.write(' (' + path + ')')
413                            self._out.write('\n')
414                # Here is a good place to resolve the abort address because we
415                # have all the information we need
416                if self._saved_abort_line:
417                    self._out.write(self.process_abort(self._saved_abort_line))
418            match = re.search(ABORT_ADDR_RE, line)
419            if match:
420                self.reset()
421                # At this point the arch and TA load address are unknown.
422                # Save the line so We can translate the abort address later.
423                self._saved_abort_line = line
424            self._out.write(line)
425
426    def flush(self):
427        self._out.flush()
428
429
430def main():
431    args = get_args()
432    if args.dir:
433        # Flatten list in case -d is used several times *and* with multiple
434        # arguments
435        args.dirs = [item for sublist in args.dir for item in sublist]
436    else:
437        args.dirs = []
438    symbolizer = Symbolizer(sys.stdout, args.dirs, args.strip_path)
439
440    for line in sys.stdin:
441        symbolizer.write(line)
442    symbolizer.flush()
443
444
445if __name__ == "__main__":
446    main()
447