1*4882a593Smuzhiyun#!/usr/bin/env perl 2*4882a593Smuzhiyun# SPDX-License-Identifier: GPL-2.0 3*4882a593Smuzhiyun# 4*4882a593Smuzhiyun# Generates a list of Control-Flow Integrity (CFI) jump table symbols 5*4882a593Smuzhiyun# for kallsyms. 6*4882a593Smuzhiyun# 7*4882a593Smuzhiyun# Copyright (C) 2021 Google LLC 8*4882a593Smuzhiyun 9*4882a593Smuzhiyunuse strict; 10*4882a593Smuzhiyunuse warnings; 11*4882a593Smuzhiyun 12*4882a593Smuzhiyun## parameters 13*4882a593Smuzhiyunmy $ismodule = 0; 14*4882a593Smuzhiyunmy $file; 15*4882a593Smuzhiyun 16*4882a593Smuzhiyunforeach (@ARGV) { 17*4882a593Smuzhiyun if ($_ eq '--module') { 18*4882a593Smuzhiyun $ismodule = 1; 19*4882a593Smuzhiyun } elsif (!defined($file)) { 20*4882a593Smuzhiyun $file = $_; 21*4882a593Smuzhiyun } else { 22*4882a593Smuzhiyun die "$0: usage $0 [--module] binary"; 23*4882a593Smuzhiyun } 24*4882a593Smuzhiyun} 25*4882a593Smuzhiyun 26*4882a593Smuzhiyun## environment 27*4882a593Smuzhiyunmy $readelf = $ENV{'READELF'} || die "$0: ERROR: READELF not set?"; 28*4882a593Smuzhiyunmy $objdump = $ENV{'OBJDUMP'} || die "$0: ERROR: OBJDUMP not set?"; 29*4882a593Smuzhiyunmy $nm = $ENV{'NM'} || die "$0: ERROR: NM not set?"; 30*4882a593Smuzhiyun 31*4882a593Smuzhiyun## jump table addresses 32*4882a593Smuzhiyunmy $cfi_jt = {}; 33*4882a593Smuzhiyun## text symbols 34*4882a593Smuzhiyunmy $text_symbols = {}; 35*4882a593Smuzhiyun 36*4882a593Smuzhiyun## parser state 37*4882a593Smuzhiyunuse constant { 38*4882a593Smuzhiyun UNKNOWN => 0, 39*4882a593Smuzhiyun SYMBOL => 1, 40*4882a593Smuzhiyun HINT => 2, 41*4882a593Smuzhiyun BRANCH => 3, 42*4882a593Smuzhiyun RELOC => 4 43*4882a593Smuzhiyun}; 44*4882a593Smuzhiyun 45*4882a593Smuzhiyun## trims leading zeros from a string 46*4882a593Smuzhiyunsub trim_zeros { 47*4882a593Smuzhiyun my ($n) = @_; 48*4882a593Smuzhiyun $n =~ s/^0+//; 49*4882a593Smuzhiyun $n = 0 if ($n eq ''); 50*4882a593Smuzhiyun return $n; 51*4882a593Smuzhiyun} 52*4882a593Smuzhiyun 53*4882a593Smuzhiyun## finds __cfi_jt_* symbols from the binary to locate the start and end of the 54*4882a593Smuzhiyun## jump table 55*4882a593Smuzhiyunsub find_cfi_jt { 56*4882a593Smuzhiyun open(my $fh, "\"$readelf\" --symbols \"$file\" 2>/dev/null | grep __cfi_jt_ |") 57*4882a593Smuzhiyun or die "$0: ERROR: failed to execute \"$readelf\": $!"; 58*4882a593Smuzhiyun 59*4882a593Smuzhiyun while (<$fh>) { 60*4882a593Smuzhiyun chomp; 61*4882a593Smuzhiyun 62*4882a593Smuzhiyun my ($addr, $name) = $_ =~ /\:.*([a-f0-9]{16}).*\s__cfi_jt_(.*)/; 63*4882a593Smuzhiyun if (defined($addr) && defined($name)) { 64*4882a593Smuzhiyun $cfi_jt->{$name} = $addr; 65*4882a593Smuzhiyun } 66*4882a593Smuzhiyun } 67*4882a593Smuzhiyun 68*4882a593Smuzhiyun close($fh); 69*4882a593Smuzhiyun 70*4882a593Smuzhiyun die "$0: ERROR: __cfi_jt_start symbol missing" if !exists($cfi_jt->{"start"}); 71*4882a593Smuzhiyun die "$0: ERROR: __cfi_jt_end symbol missing" if !exists($cfi_jt->{"end"}); 72*4882a593Smuzhiyun} 73*4882a593Smuzhiyun 74*4882a593Smuzhiyunmy $last = UNKNOWN; 75*4882a593Smuzhiyunmy $last_symbol; 76*4882a593Smuzhiyunmy $last_hint_addr; 77*4882a593Smuzhiyunmy $last_branch_addr; 78*4882a593Smuzhiyunmy $last_branch_target; 79*4882a593Smuzhiyunmy $last_reloc_target; 80*4882a593Smuzhiyun 81*4882a593Smuzhiyunsub is_symbol { 82*4882a593Smuzhiyun my ($line) = @_; 83*4882a593Smuzhiyun my ($addr, $symbol) = $_ =~ /^([a-f0-9]{16})\s<([^>]+)>\:/; 84*4882a593Smuzhiyun 85*4882a593Smuzhiyun if (defined($addr) && defined($symbol)) { 86*4882a593Smuzhiyun $last = SYMBOL; 87*4882a593Smuzhiyun $last_symbol = $symbol; 88*4882a593Smuzhiyun return 1; 89*4882a593Smuzhiyun } 90*4882a593Smuzhiyun 91*4882a593Smuzhiyun return 0; 92*4882a593Smuzhiyun} 93*4882a593Smuzhiyun 94*4882a593Smuzhiyunsub is_hint { 95*4882a593Smuzhiyun my ($line) = @_; 96*4882a593Smuzhiyun my ($hint) = $_ =~ /^\s*([a-f0-9]+)\:.*\s+hint\s+#/; 97*4882a593Smuzhiyun 98*4882a593Smuzhiyun if (defined($hint)) { 99*4882a593Smuzhiyun $last = HINT; 100*4882a593Smuzhiyun $last_hint_addr = $hint; 101*4882a593Smuzhiyun return 1; 102*4882a593Smuzhiyun } 103*4882a593Smuzhiyun 104*4882a593Smuzhiyun return 0; 105*4882a593Smuzhiyun} 106*4882a593Smuzhiyun 107*4882a593Smuzhiyunsub find_text_symbol { 108*4882a593Smuzhiyun my ($target) = @_; 109*4882a593Smuzhiyun 110*4882a593Smuzhiyun my ($symbol, $expr, $offset) = $target =~ /^(\S*)([-\+])0x([a-f0-9]+)?$/; 111*4882a593Smuzhiyun 112*4882a593Smuzhiyun if (!defined($symbol) || !defined(!$expr) || !defined($offset)) { 113*4882a593Smuzhiyun return $target; 114*4882a593Smuzhiyun } 115*4882a593Smuzhiyun 116*4882a593Smuzhiyun if ($symbol =~ /^\.((init|exit)\.)?text$/ && $expr eq '+') { 117*4882a593Smuzhiyun $offset = trim_zeros($offset); 118*4882a593Smuzhiyun my $actual = $text_symbols->{"$symbol+$offset"}; 119*4882a593Smuzhiyun 120*4882a593Smuzhiyun if (!defined($actual)) { 121*4882a593Smuzhiyun die "$0: unknown symbol at $symbol+0x$offset"; 122*4882a593Smuzhiyun } 123*4882a593Smuzhiyun 124*4882a593Smuzhiyun $symbol = $actual; 125*4882a593Smuzhiyun } 126*4882a593Smuzhiyun 127*4882a593Smuzhiyun return $symbol; 128*4882a593Smuzhiyun} 129*4882a593Smuzhiyun 130*4882a593Smuzhiyunsub is_branch { 131*4882a593Smuzhiyun my ($line) = @_; 132*4882a593Smuzhiyun my ($addr, $instr, $branch_target) = $_ =~ 133*4882a593Smuzhiyun /^\s*([a-f0-9]+)\:.*(b|jmpq?)\s+0x[a-f0-9]+\s+<([^>]+)>/; 134*4882a593Smuzhiyun 135*4882a593Smuzhiyun if (defined($addr) && defined($instr) && defined($branch_target)) { 136*4882a593Smuzhiyun if ($last eq HINT) { 137*4882a593Smuzhiyun $last_branch_addr = $last_hint_addr; 138*4882a593Smuzhiyun } else { 139*4882a593Smuzhiyun $last_branch_addr = $addr; 140*4882a593Smuzhiyun } 141*4882a593Smuzhiyun 142*4882a593Smuzhiyun $last = BRANCH; 143*4882a593Smuzhiyun $last_branch_target = find_text_symbol($branch_target); 144*4882a593Smuzhiyun return 1; 145*4882a593Smuzhiyun } 146*4882a593Smuzhiyun 147*4882a593Smuzhiyun return 0; 148*4882a593Smuzhiyun} 149*4882a593Smuzhiyun 150*4882a593Smuzhiyunsub is_branch_reloc { 151*4882a593Smuzhiyun my ($line) = @_; 152*4882a593Smuzhiyun 153*4882a593Smuzhiyun if ($last ne BRANCH) { 154*4882a593Smuzhiyun return 0; 155*4882a593Smuzhiyun } 156*4882a593Smuzhiyun 157*4882a593Smuzhiyun my ($addr, $type, $reloc_target) = /\s*([a-f0-9]{16})\:\s+R_(\S+)\s+(\S+)$/; 158*4882a593Smuzhiyun 159*4882a593Smuzhiyun if (defined($addr) && defined($type) && defined($reloc_target)) { 160*4882a593Smuzhiyun $last = RELOC; 161*4882a593Smuzhiyun $last_reloc_target = find_text_symbol($reloc_target); 162*4882a593Smuzhiyun return 1; 163*4882a593Smuzhiyun } 164*4882a593Smuzhiyun 165*4882a593Smuzhiyun return 0; 166*4882a593Smuzhiyun} 167*4882a593Smuzhiyun 168*4882a593Smuzhiyun## walks through the jump table looking for branches and prints out a jump 169*4882a593Smuzhiyun## table symbol for each branch if one is missing 170*4882a593Smuzhiyunsub print_missing_symbols { 171*4882a593Smuzhiyun my @symbols; 172*4882a593Smuzhiyun 173*4882a593Smuzhiyun open(my $fh, "\"$objdump\" -d -r " . 174*4882a593Smuzhiyun "--start-address=0x" . $cfi_jt->{"start"} . 175*4882a593Smuzhiyun " --stop-address=0x" . $cfi_jt->{"end"} . 176*4882a593Smuzhiyun " \"$file\" 2>/dev/null |") 177*4882a593Smuzhiyun or die "$0: ERROR: failed to execute \"$objdump\": $!"; 178*4882a593Smuzhiyun 179*4882a593Smuzhiyun while (<$fh>) { 180*4882a593Smuzhiyun chomp; 181*4882a593Smuzhiyun 182*4882a593Smuzhiyun if (is_symbol($_) || is_hint($_)) { 183*4882a593Smuzhiyun next; 184*4882a593Smuzhiyun } 185*4882a593Smuzhiyun 186*4882a593Smuzhiyun my $cfi_jt_symbol; 187*4882a593Smuzhiyun 188*4882a593Smuzhiyun if (is_branch($_)) { 189*4882a593Smuzhiyun if ($ismodule) { 190*4882a593Smuzhiyun next; # wait for the relocation 191*4882a593Smuzhiyun } 192*4882a593Smuzhiyun 193*4882a593Smuzhiyun $cfi_jt_symbol = $last_branch_target; 194*4882a593Smuzhiyun } elsif (is_branch_reloc($_)) { 195*4882a593Smuzhiyun $cfi_jt_symbol = $last_reloc_target; 196*4882a593Smuzhiyun } else { 197*4882a593Smuzhiyun next; 198*4882a593Smuzhiyun } 199*4882a593Smuzhiyun 200*4882a593Smuzhiyun # ignore functions with a canonical jump table 201*4882a593Smuzhiyun if ($cfi_jt_symbol =~ /\.cfi$/) { 202*4882a593Smuzhiyun next; 203*4882a593Smuzhiyun } 204*4882a593Smuzhiyun 205*4882a593Smuzhiyun $cfi_jt_symbol .= ".cfi_jt"; 206*4882a593Smuzhiyun $cfi_jt->{$last_branch_addr} = $cfi_jt_symbol; 207*4882a593Smuzhiyun 208*4882a593Smuzhiyun if (defined($last_symbol) && $last_symbol eq $cfi_jt_symbol) { 209*4882a593Smuzhiyun next; # already exists 210*4882a593Smuzhiyun } 211*4882a593Smuzhiyun 212*4882a593Smuzhiyun # print out the symbol 213*4882a593Smuzhiyun if ($ismodule) { 214*4882a593Smuzhiyun push(@symbols, "\t\t$cfi_jt_symbol = . + 0x$last_branch_addr;"); 215*4882a593Smuzhiyun } else { 216*4882a593Smuzhiyun push(@symbols, "$last_branch_addr t $cfi_jt_symbol"); 217*4882a593Smuzhiyun } 218*4882a593Smuzhiyun } 219*4882a593Smuzhiyun 220*4882a593Smuzhiyun close($fh); 221*4882a593Smuzhiyun 222*4882a593Smuzhiyun if (!scalar(@symbols)) { 223*4882a593Smuzhiyun return; 224*4882a593Smuzhiyun } 225*4882a593Smuzhiyun 226*4882a593Smuzhiyun if ($ismodule) { 227*4882a593Smuzhiyun print "SECTIONS {\n"; 228*4882a593Smuzhiyun # With -fpatchable-function-entry, LLD isn't happy without this 229*4882a593Smuzhiyun print "\t__patchable_function_entries : { *(__patchable_function_entries) }\n"; 230*4882a593Smuzhiyun print "\t.text : {\n"; 231*4882a593Smuzhiyun } 232*4882a593Smuzhiyun 233*4882a593Smuzhiyun foreach (@symbols) { 234*4882a593Smuzhiyun print "$_\n"; 235*4882a593Smuzhiyun } 236*4882a593Smuzhiyun 237*4882a593Smuzhiyun if ($ismodule) { 238*4882a593Smuzhiyun print "\t}\n}\n"; 239*4882a593Smuzhiyun } 240*4882a593Smuzhiyun} 241*4882a593Smuzhiyun 242*4882a593Smuzhiyun## reads defined text symbols from the file 243*4882a593Smuzhiyunsub read_symbols { 244*4882a593Smuzhiyun open(my $fh, "\"$objdump\" --syms \"$file\" 2>/dev/null |") 245*4882a593Smuzhiyun or die "$0: ERROR: failed to execute \"$nm\": $!"; 246*4882a593Smuzhiyun 247*4882a593Smuzhiyun while (<$fh>) { 248*4882a593Smuzhiyun chomp; 249*4882a593Smuzhiyun 250*4882a593Smuzhiyun # llvm/tools/llvm-objdump/objdump.cpp:objdump::printSymbol 251*4882a593Smuzhiyun my ($addr, $debug, $section, $ref, $symbol) = $_ =~ 252*4882a593Smuzhiyun /^([a-f0-9]{16})\s.{5}(.).{2}(\S+)\s[a-f0-9]{16}(\s\.\S+)?\s(.*)$/; 253*4882a593Smuzhiyun 254*4882a593Smuzhiyun if (defined($addr) && defined($section) && defined($symbol)) { 255*4882a593Smuzhiyun if (!($section =~ /^\.((init|exit)\.)?text$/)) { 256*4882a593Smuzhiyun next; 257*4882a593Smuzhiyun } 258*4882a593Smuzhiyun # skip arm mapping symbols 259*4882a593Smuzhiyun if ($symbol =~ /^\$[xd]\.\d+$/) { 260*4882a593Smuzhiyun next; 261*4882a593Smuzhiyun } 262*4882a593Smuzhiyun if (defined($debug) && $debug eq "d") { 263*4882a593Smuzhiyun next; 264*4882a593Smuzhiyun } 265*4882a593Smuzhiyun 266*4882a593Smuzhiyun $addr = trim_zeros($addr); 267*4882a593Smuzhiyun $text_symbols->{"$section+$addr"} = $symbol; 268*4882a593Smuzhiyun } 269*4882a593Smuzhiyun } 270*4882a593Smuzhiyun 271*4882a593Smuzhiyun close($fh); 272*4882a593Smuzhiyun} 273*4882a593Smuzhiyun 274*4882a593Smuzhiyun## prints out the remaining symbols from nm -n, filtering out the unnecessary 275*4882a593Smuzhiyun## __typeid__ symbols aliasing the jump table symbols we added 276*4882a593Smuzhiyunsub print_kallsyms { 277*4882a593Smuzhiyun open(my $fh, "\"$nm\" -n \"$file\" 2>/dev/null |") 278*4882a593Smuzhiyun or die "$0: ERROR: failed to execute \"$nm\": $!"; 279*4882a593Smuzhiyun 280*4882a593Smuzhiyun while (<$fh>) { 281*4882a593Smuzhiyun chomp; 282*4882a593Smuzhiyun 283*4882a593Smuzhiyun my ($addr, $symbol) = $_ =~ /^([a-f0-9]{16})\s.\s(.*)$/; 284*4882a593Smuzhiyun 285*4882a593Smuzhiyun if (defined($addr) && defined($symbol)) { 286*4882a593Smuzhiyun # drop duplicate __typeid__ symbols 287*4882a593Smuzhiyun if ($symbol =~ /^__typeid__.*_global_addr$/ && 288*4882a593Smuzhiyun exists($cfi_jt->{$addr})) { 289*4882a593Smuzhiyun next; 290*4882a593Smuzhiyun } 291*4882a593Smuzhiyun } 292*4882a593Smuzhiyun 293*4882a593Smuzhiyun print "$_\n"; 294*4882a593Smuzhiyun } 295*4882a593Smuzhiyun 296*4882a593Smuzhiyun close($fh); 297*4882a593Smuzhiyun} 298*4882a593Smuzhiyun 299*4882a593Smuzhiyun## main 300*4882a593Smuzhiyunfind_cfi_jt(); 301*4882a593Smuzhiyun 302*4882a593Smuzhiyunif ($ismodule) { 303*4882a593Smuzhiyun read_symbols(); 304*4882a593Smuzhiyun print_missing_symbols(); 305*4882a593Smuzhiyun} else { 306*4882a593Smuzhiyun print_missing_symbols(); 307*4882a593Smuzhiyun print_kallsyms(); 308*4882a593Smuzhiyun} 309