1*4882a593Smuzhiyun#!/bin/sh 2*4882a593Smuzhiyun# SPDX-License-Identifier: GPL-2.0-only 3*4882a593Smuzhiyun# 4*4882a593Smuzhiyun# Copyright 2015, Daniel Axtens, IBM Corporation 5*4882a593Smuzhiyun# 6*4882a593Smuzhiyun 7*4882a593Smuzhiyun 8*4882a593Smuzhiyun# do we have ./getscom, ./putscom? 9*4882a593Smuzhiyunif [ -x ./getscom ] && [ -x ./putscom ]; then 10*4882a593Smuzhiyun GETSCOM=./getscom 11*4882a593Smuzhiyun PUTSCOM=./putscom 12*4882a593Smuzhiyunelif which getscom > /dev/null; then 13*4882a593Smuzhiyun GETSCOM=$(which getscom) 14*4882a593Smuzhiyun PUTSCOM=$(which putscom) 15*4882a593Smuzhiyunelse 16*4882a593Smuzhiyun cat <<EOF 17*4882a593SmuzhiyunCan't find getscom/putscom in . or \$PATH. 18*4882a593SmuzhiyunSee https://github.com/open-power/skiboot. 19*4882a593SmuzhiyunThe tool is in external/xscom-utils 20*4882a593SmuzhiyunEOF 21*4882a593Smuzhiyun exit 1 22*4882a593Smuzhiyunfi 23*4882a593Smuzhiyun 24*4882a593Smuzhiyun# We will get 8 HMI events per injection 25*4882a593Smuzhiyun# todo: deal with things being offline 26*4882a593Smuzhiyunexpected_hmis=8 27*4882a593SmuzhiyunCOUNT_HMIS() { 28*4882a593Smuzhiyun dmesg | grep -c 'Harmless Hypervisor Maintenance interrupt' 29*4882a593Smuzhiyun} 30*4882a593Smuzhiyun 31*4882a593Smuzhiyun# massively expand snooze delay, allowing injection on all cores 32*4882a593Smuzhiyunppc64_cpu --smt-snooze-delay=1000000000 33*4882a593Smuzhiyun 34*4882a593Smuzhiyun# when we exit, restore it 35*4882a593Smuzhiyuntrap "ppc64_cpu --smt-snooze-delay=100" 0 1 36*4882a593Smuzhiyun 37*4882a593Smuzhiyun# for each chip+core combination 38*4882a593Smuzhiyun# todo - less fragile parsing 39*4882a593Smuzhiyunegrep -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog | 40*4882a593Smuzhiyunwhile read chipcore; do 41*4882a593Smuzhiyun chip=$(echo "$chipcore"|awk '{print $3}') 42*4882a593Smuzhiyun core=$(echo "$chipcore"|awk '{print $5}') 43*4882a593Smuzhiyun fir="0x1${core}013100" 44*4882a593Smuzhiyun 45*4882a593Smuzhiyun # verify that Core FIR is zero as expected 46*4882a593Smuzhiyun if [ "$($GETSCOM -c 0x${chip} $fir)" != 0 ]; then 47*4882a593Smuzhiyun echo "FIR was not zero before injection for chip $chip, core $core. Aborting!" 48*4882a593Smuzhiyun echo "Result of $GETSCOM -c 0x${chip} $fir:" 49*4882a593Smuzhiyun $GETSCOM -c 0x${chip} $fir 50*4882a593Smuzhiyun echo "If you get a -5 error, the core may be in idle state. Try stress-ng." 51*4882a593Smuzhiyun echo "Otherwise, try $PUTSCOM -c 0x${chip} $fir 0" 52*4882a593Smuzhiyun exit 1 53*4882a593Smuzhiyun fi 54*4882a593Smuzhiyun 55*4882a593Smuzhiyun # keep track of the number of HMIs handled 56*4882a593Smuzhiyun old_hmis=$(COUNT_HMIS) 57*4882a593Smuzhiyun 58*4882a593Smuzhiyun # do injection, adding a marker to dmesg for clarity 59*4882a593Smuzhiyun echo "Injecting HMI on core $core, chip $chip" | tee /dev/kmsg 60*4882a593Smuzhiyun # inject a RegFile recoverable error 61*4882a593Smuzhiyun if ! $PUTSCOM -c 0x${chip} $fir 2000000000000000 > /dev/null; then 62*4882a593Smuzhiyun echo "Error injecting. Aborting!" 63*4882a593Smuzhiyun exit 1 64*4882a593Smuzhiyun fi 65*4882a593Smuzhiyun 66*4882a593Smuzhiyun # now we want to wait for all the HMIs to be processed 67*4882a593Smuzhiyun # we expect one per thread on the core 68*4882a593Smuzhiyun i=0; 69*4882a593Smuzhiyun new_hmis=$(COUNT_HMIS) 70*4882a593Smuzhiyun while [ $new_hmis -lt $((old_hmis + expected_hmis)) ] && [ $i -lt 12 ]; do 71*4882a593Smuzhiyun echo "Seen $((new_hmis - old_hmis)) HMI(s) out of $expected_hmis expected, sleeping" 72*4882a593Smuzhiyun sleep 5; 73*4882a593Smuzhiyun i=$((i + 1)) 74*4882a593Smuzhiyun new_hmis=$(COUNT_HMIS) 75*4882a593Smuzhiyun done 76*4882a593Smuzhiyun if [ $i = 12 ]; then 77*4882a593Smuzhiyun echo "Haven't seen expected $expected_hmis recoveries after 1 min. Aborting." 78*4882a593Smuzhiyun exit 1 79*4882a593Smuzhiyun fi 80*4882a593Smuzhiyun echo "Processed $expected_hmis events; presumed success. Check dmesg." 81*4882a593Smuzhiyun echo "" 82*4882a593Smuzhiyundone 83