xref: /OK3568_Linux_fs/kernel/scripts/cleanfile (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun#!/usr/bin/env perl
2*4882a593Smuzhiyun# SPDX-License-Identifier: GPL-2.0
3*4882a593Smuzhiyun#
4*4882a593Smuzhiyun# Clean a text file -- or directory of text files -- of stealth whitespace.
5*4882a593Smuzhiyun# WARNING: this can be a highly destructive operation.  Use with caution.
6*4882a593Smuzhiyun#
7*4882a593Smuzhiyun
8*4882a593Smuzhiyunuse warnings;
9*4882a593Smuzhiyunuse bytes;
10*4882a593Smuzhiyunuse File::Basename;
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun# Default options
13*4882a593Smuzhiyun$max_width = 79;
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun# Clean up space-tab sequences, either by removing spaces or
16*4882a593Smuzhiyun# replacing them with tabs.
17*4882a593Smuzhiyunsub clean_space_tabs($)
18*4882a593Smuzhiyun{
19*4882a593Smuzhiyun    no bytes;			# Tab alignment depends on characters
20*4882a593Smuzhiyun
21*4882a593Smuzhiyun    my($li) = @_;
22*4882a593Smuzhiyun    my($lo) = '';
23*4882a593Smuzhiyun    my $pos = 0;
24*4882a593Smuzhiyun    my $nsp = 0;
25*4882a593Smuzhiyun    my($i, $c);
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun    for ($i = 0; $i < length($li); $i++) {
28*4882a593Smuzhiyun	$c = substr($li, $i, 1);
29*4882a593Smuzhiyun	if ($c eq "\t") {
30*4882a593Smuzhiyun	    my $npos = ($pos+$nsp+8) & ~7;
31*4882a593Smuzhiyun	    my $ntab = ($npos >> 3) - ($pos >> 3);
32*4882a593Smuzhiyun	    $lo .= "\t" x $ntab;
33*4882a593Smuzhiyun	    $pos = $npos;
34*4882a593Smuzhiyun	    $nsp = 0;
35*4882a593Smuzhiyun	} elsif ($c eq "\n" || $c eq "\r") {
36*4882a593Smuzhiyun	    $lo .= " " x $nsp;
37*4882a593Smuzhiyun	    $pos += $nsp;
38*4882a593Smuzhiyun	    $nsp = 0;
39*4882a593Smuzhiyun	    $lo .= $c;
40*4882a593Smuzhiyun	    $pos = 0;
41*4882a593Smuzhiyun	} elsif ($c eq " ") {
42*4882a593Smuzhiyun	    $nsp++;
43*4882a593Smuzhiyun	} else {
44*4882a593Smuzhiyun	    $lo .= " " x $nsp;
45*4882a593Smuzhiyun	    $pos += $nsp;
46*4882a593Smuzhiyun	    $nsp = 0;
47*4882a593Smuzhiyun	    $lo .= $c;
48*4882a593Smuzhiyun	    $pos++;
49*4882a593Smuzhiyun	}
50*4882a593Smuzhiyun    }
51*4882a593Smuzhiyun    $lo .= " " x $nsp;
52*4882a593Smuzhiyun    return $lo;
53*4882a593Smuzhiyun}
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun# Compute the visual width of a string
56*4882a593Smuzhiyunsub strwidth($) {
57*4882a593Smuzhiyun    no bytes;			# Tab alignment depends on characters
58*4882a593Smuzhiyun
59*4882a593Smuzhiyun    my($li) = @_;
60*4882a593Smuzhiyun    my($c, $i);
61*4882a593Smuzhiyun    my $pos = 0;
62*4882a593Smuzhiyun    my $mlen = 0;
63*4882a593Smuzhiyun
64*4882a593Smuzhiyun    for ($i = 0; $i < length($li); $i++) {
65*4882a593Smuzhiyun	$c = substr($li,$i,1);
66*4882a593Smuzhiyun	if ($c eq "\t") {
67*4882a593Smuzhiyun	    $pos = ($pos+8) & ~7;
68*4882a593Smuzhiyun	} elsif ($c eq "\n") {
69*4882a593Smuzhiyun	    $mlen = $pos if ($pos > $mlen);
70*4882a593Smuzhiyun	    $pos = 0;
71*4882a593Smuzhiyun	} else {
72*4882a593Smuzhiyun	    $pos++;
73*4882a593Smuzhiyun	}
74*4882a593Smuzhiyun    }
75*4882a593Smuzhiyun
76*4882a593Smuzhiyun    $mlen = $pos if ($pos > $mlen);
77*4882a593Smuzhiyun    return $mlen;
78*4882a593Smuzhiyun}
79*4882a593Smuzhiyun
80*4882a593Smuzhiyun$name = basename($0);
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun@files = ();
83*4882a593Smuzhiyun
84*4882a593Smuzhiyunwhile (defined($a = shift(@ARGV))) {
85*4882a593Smuzhiyun    if ($a =~ /^-/) {
86*4882a593Smuzhiyun	if ($a eq '-width' || $a eq '-w') {
87*4882a593Smuzhiyun	    $max_width = shift(@ARGV)+0;
88*4882a593Smuzhiyun	} else {
89*4882a593Smuzhiyun	    print STDERR "Usage: $name [-width #] files...\n";
90*4882a593Smuzhiyun	    exit 1;
91*4882a593Smuzhiyun	}
92*4882a593Smuzhiyun    } else {
93*4882a593Smuzhiyun	push(@files, $a);
94*4882a593Smuzhiyun    }
95*4882a593Smuzhiyun}
96*4882a593Smuzhiyun
97*4882a593Smuzhiyunforeach $f ( @files ) {
98*4882a593Smuzhiyun    print STDERR "$name: $f\n";
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun    if (! -f $f) {
101*4882a593Smuzhiyun	print STDERR "$f: not a file\n";
102*4882a593Smuzhiyun	next;
103*4882a593Smuzhiyun    }
104*4882a593Smuzhiyun
105*4882a593Smuzhiyun    if (!open(FILE, '+<', $f)) {
106*4882a593Smuzhiyun	print STDERR "$name: Cannot open file: $f: $!\n";
107*4882a593Smuzhiyun	next;
108*4882a593Smuzhiyun    }
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun    binmode FILE;
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun    # First, verify that it is not a binary file; consider any file
113*4882a593Smuzhiyun    # with a zero byte to be a binary file.  Is there any better, or
114*4882a593Smuzhiyun    # additional, heuristic that should be applied?
115*4882a593Smuzhiyun    $is_binary = 0;
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun    while (read(FILE, $data, 65536) > 0) {
118*4882a593Smuzhiyun	if ($data =~ /\0/) {
119*4882a593Smuzhiyun	    $is_binary = 1;
120*4882a593Smuzhiyun	    last;
121*4882a593Smuzhiyun	}
122*4882a593Smuzhiyun    }
123*4882a593Smuzhiyun
124*4882a593Smuzhiyun    if ($is_binary) {
125*4882a593Smuzhiyun	print STDERR "$name: $f: binary file\n";
126*4882a593Smuzhiyun	next;
127*4882a593Smuzhiyun    }
128*4882a593Smuzhiyun
129*4882a593Smuzhiyun    seek(FILE, 0, 0);
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun    $in_bytes = 0;
132*4882a593Smuzhiyun    $out_bytes = 0;
133*4882a593Smuzhiyun    $blank_bytes = 0;
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun    @blanks = ();
136*4882a593Smuzhiyun    @lines  = ();
137*4882a593Smuzhiyun    $lineno = 0;
138*4882a593Smuzhiyun
139*4882a593Smuzhiyun    while ( defined($line = <FILE>) ) {
140*4882a593Smuzhiyun	$lineno++;
141*4882a593Smuzhiyun	$in_bytes += length($line);
142*4882a593Smuzhiyun	$line =~ s/[ \t\r]*$//;		# Remove trailing spaces
143*4882a593Smuzhiyun	$line = clean_space_tabs($line);
144*4882a593Smuzhiyun
145*4882a593Smuzhiyun	if ( $line eq "\n" ) {
146*4882a593Smuzhiyun	    push(@blanks, $line);
147*4882a593Smuzhiyun	    $blank_bytes += length($line);
148*4882a593Smuzhiyun	} else {
149*4882a593Smuzhiyun	    push(@lines, @blanks);
150*4882a593Smuzhiyun	    $out_bytes += $blank_bytes;
151*4882a593Smuzhiyun	    push(@lines, $line);
152*4882a593Smuzhiyun	    $out_bytes += length($line);
153*4882a593Smuzhiyun	    @blanks = ();
154*4882a593Smuzhiyun	    $blank_bytes = 0;
155*4882a593Smuzhiyun	}
156*4882a593Smuzhiyun
157*4882a593Smuzhiyun	$l_width = strwidth($line);
158*4882a593Smuzhiyun	if ($max_width && $l_width > $max_width) {
159*4882a593Smuzhiyun	    print STDERR
160*4882a593Smuzhiyun		"$f:$lineno: line exceeds $max_width characters ($l_width)\n";
161*4882a593Smuzhiyun	}
162*4882a593Smuzhiyun    }
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun    # Any blanks at the end of the file are discarded
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun    if ($in_bytes != $out_bytes) {
167*4882a593Smuzhiyun	# Only write to the file if changed
168*4882a593Smuzhiyun	seek(FILE, 0, 0);
169*4882a593Smuzhiyun	print FILE @lines;
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun	if ( !defined($where = tell(FILE)) ||
172*4882a593Smuzhiyun	     !truncate(FILE, $where) ) {
173*4882a593Smuzhiyun	    die "$name: Failed to truncate modified file: $f: $!\n";
174*4882a593Smuzhiyun	}
175*4882a593Smuzhiyun    }
176*4882a593Smuzhiyun
177*4882a593Smuzhiyun    close(FILE);
178*4882a593Smuzhiyun}
179