xref: /OK3568_Linux_fs/kernel/tools/testing/selftests/powerpc/eeh/eeh-basic.sh (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun#!/bin/sh
2*4882a593Smuzhiyun# SPDX-License-Identifier: GPL-2.0-only
3*4882a593Smuzhiyun
4*4882a593SmuzhiyunKSELFTESTS_SKIP=4
5*4882a593Smuzhiyun
6*4882a593Smuzhiyun. ./eeh-functions.sh
7*4882a593Smuzhiyun
8*4882a593Smuzhiyunif ! eeh_supported ; then
9*4882a593Smuzhiyun	echo "EEH not supported on this system, skipping"
10*4882a593Smuzhiyun	exit $KSELFTESTS_SKIP;
11*4882a593Smuzhiyunfi
12*4882a593Smuzhiyun
13*4882a593Smuzhiyunif [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \
14*4882a593Smuzhiyun   [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then
15*4882a593Smuzhiyun	echo "debugfs EEH testing files are missing. Is debugfs mounted?"
16*4882a593Smuzhiyun	exit $KSELFTESTS_SKIP;
17*4882a593Smuzhiyunfi
18*4882a593Smuzhiyun
19*4882a593Smuzhiyunpre_lspci=`mktemp`
20*4882a593Smuzhiyunlspci > $pre_lspci
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun# Bump the max freeze count to something absurd so we don't
23*4882a593Smuzhiyun# trip over it while breaking things.
24*4882a593Smuzhiyunecho 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun# record the devices that we break in here. Assuming everything
27*4882a593Smuzhiyun# goes to plan we should get them back once the recover process
28*4882a593Smuzhiyun# is finished.
29*4882a593Smuzhiyundevices=""
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun# Build up a list of candidate devices.
32*4882a593Smuzhiyunfor dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do
33*4882a593Smuzhiyun	# skip bridges since we can't recover them (yet...)
34*4882a593Smuzhiyun	if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then
35*4882a593Smuzhiyun		echo "$dev, Skipped: bridge"
36*4882a593Smuzhiyun		continue;
37*4882a593Smuzhiyun	fi
38*4882a593Smuzhiyun
39*4882a593Smuzhiyun	# Skip VFs for now since we don't have a reliable way
40*4882a593Smuzhiyun	# to break them.
41*4882a593Smuzhiyun	if [ -e "/sys/bus/pci/devices/$dev/physfn" ] ; then
42*4882a593Smuzhiyun		echo "$dev, Skipped: virtfn"
43*4882a593Smuzhiyun		continue;
44*4882a593Smuzhiyun	fi
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun	if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then
47*4882a593Smuzhiyun		echo "$dev, Skipped: ahci doesn't support recovery"
48*4882a593Smuzhiyun		continue
49*4882a593Smuzhiyun	fi
50*4882a593Smuzhiyun
51*4882a593Smuzhiyun	# Don't inject errosr into an already-frozen PE. This happens with
52*4882a593Smuzhiyun	# PEs that contain multiple PCI devices (e.g. multi-function cards)
53*4882a593Smuzhiyun	# and injecting new errors during the recovery process will probably
54*4882a593Smuzhiyun	# result in the recovery failing and the device being marked as
55*4882a593Smuzhiyun	# failed.
56*4882a593Smuzhiyun	if ! pe_ok $dev ; then
57*4882a593Smuzhiyun		echo "$dev, Skipped: Bad initial PE state"
58*4882a593Smuzhiyun		continue;
59*4882a593Smuzhiyun	fi
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun	echo "$dev, Added"
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun	# Add to this list of device to check
64*4882a593Smuzhiyun	devices="$devices $dev"
65*4882a593Smuzhiyundone
66*4882a593Smuzhiyun
67*4882a593Smuzhiyundev_count="$(echo $devices | wc -w)"
68*4882a593Smuzhiyunecho "Found ${dev_count} breakable devices..."
69*4882a593Smuzhiyun
70*4882a593Smuzhiyunfailed=0
71*4882a593Smuzhiyunfor dev in $devices ; do
72*4882a593Smuzhiyun	echo "Breaking $dev..."
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun	if ! pe_ok $dev ; then
75*4882a593Smuzhiyun		echo "Skipping $dev, Initial PE state is not ok"
76*4882a593Smuzhiyun		failed="$((failed + 1))"
77*4882a593Smuzhiyun		continue;
78*4882a593Smuzhiyun	fi
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun	if ! eeh_one_dev $dev ; then
81*4882a593Smuzhiyun		failed="$((failed + 1))"
82*4882a593Smuzhiyun	fi
83*4882a593Smuzhiyundone
84*4882a593Smuzhiyun
85*4882a593Smuzhiyunecho "$failed devices failed to recover ($dev_count tested)"
86*4882a593Smuzhiyunlspci | diff -u $pre_lspci -
87*4882a593Smuzhiyunrm -f $pre_lspci
88*4882a593Smuzhiyun
89*4882a593Smuzhiyuntest "$failed" -eq 0
90*4882a593Smuzhiyunexit $?
91