1*4882a593Smuzhiyun#!/usr/bin/env perl 2*4882a593Smuzhiyun# SPDX-License-Identifier: GPL-2.0 3*4882a593Smuzhiyun# 4*4882a593Smuzhiyun# Clean a text file -- or directory of text files -- of stealth whitespace. 5*4882a593Smuzhiyun# WARNING: this can be a highly destructive operation. Use with caution. 6*4882a593Smuzhiyun# 7*4882a593Smuzhiyun 8*4882a593Smuzhiyunuse warnings; 9*4882a593Smuzhiyunuse bytes; 10*4882a593Smuzhiyunuse File::Basename; 11*4882a593Smuzhiyun 12*4882a593Smuzhiyun# Default options 13*4882a593Smuzhiyun$max_width = 79; 14*4882a593Smuzhiyun 15*4882a593Smuzhiyun# Clean up space-tab sequences, either by removing spaces or 16*4882a593Smuzhiyun# replacing them with tabs. 17*4882a593Smuzhiyunsub clean_space_tabs($) 18*4882a593Smuzhiyun{ 19*4882a593Smuzhiyun no bytes; # Tab alignment depends on characters 20*4882a593Smuzhiyun 21*4882a593Smuzhiyun my($li) = @_; 22*4882a593Smuzhiyun my($lo) = ''; 23*4882a593Smuzhiyun my $pos = 0; 24*4882a593Smuzhiyun my $nsp = 0; 25*4882a593Smuzhiyun my($i, $c); 26*4882a593Smuzhiyun 27*4882a593Smuzhiyun for ($i = 0; $i < length($li); $i++) { 28*4882a593Smuzhiyun $c = substr($li, $i, 1); 29*4882a593Smuzhiyun if ($c eq "\t") { 30*4882a593Smuzhiyun my $npos = ($pos+$nsp+8) & ~7; 31*4882a593Smuzhiyun my $ntab = ($npos >> 3) - ($pos >> 3); 32*4882a593Smuzhiyun $lo .= "\t" x $ntab; 33*4882a593Smuzhiyun $pos = $npos; 34*4882a593Smuzhiyun $nsp = 0; 35*4882a593Smuzhiyun } elsif ($c eq "\n" || $c eq "\r") { 36*4882a593Smuzhiyun $lo .= " " x $nsp; 37*4882a593Smuzhiyun $pos += $nsp; 38*4882a593Smuzhiyun $nsp = 0; 39*4882a593Smuzhiyun $lo .= $c; 40*4882a593Smuzhiyun $pos = 0; 41*4882a593Smuzhiyun } elsif ($c eq " ") { 42*4882a593Smuzhiyun $nsp++; 43*4882a593Smuzhiyun } else { 44*4882a593Smuzhiyun $lo .= " " x $nsp; 45*4882a593Smuzhiyun $pos += $nsp; 46*4882a593Smuzhiyun $nsp = 0; 47*4882a593Smuzhiyun $lo .= $c; 48*4882a593Smuzhiyun $pos++; 49*4882a593Smuzhiyun } 50*4882a593Smuzhiyun } 51*4882a593Smuzhiyun $lo .= " " x $nsp; 52*4882a593Smuzhiyun return $lo; 53*4882a593Smuzhiyun} 54*4882a593Smuzhiyun 55*4882a593Smuzhiyun# Compute the visual width of a string 56*4882a593Smuzhiyunsub strwidth($) { 57*4882a593Smuzhiyun no bytes; # Tab alignment depends on characters 58*4882a593Smuzhiyun 59*4882a593Smuzhiyun my($li) = @_; 60*4882a593Smuzhiyun my($c, $i); 61*4882a593Smuzhiyun my $pos = 0; 62*4882a593Smuzhiyun my $mlen = 0; 63*4882a593Smuzhiyun 64*4882a593Smuzhiyun for ($i = 0; $i < length($li); $i++) { 65*4882a593Smuzhiyun $c = substr($li,$i,1); 66*4882a593Smuzhiyun if ($c eq "\t") { 67*4882a593Smuzhiyun $pos = ($pos+8) & ~7; 68*4882a593Smuzhiyun } elsif ($c eq "\n") { 69*4882a593Smuzhiyun $mlen = $pos if ($pos > $mlen); 70*4882a593Smuzhiyun $pos = 0; 71*4882a593Smuzhiyun } else { 72*4882a593Smuzhiyun $pos++; 73*4882a593Smuzhiyun } 74*4882a593Smuzhiyun } 75*4882a593Smuzhiyun 76*4882a593Smuzhiyun $mlen = $pos if ($pos > $mlen); 77*4882a593Smuzhiyun return $mlen; 78*4882a593Smuzhiyun} 79*4882a593Smuzhiyun 80*4882a593Smuzhiyun$name = basename($0); 81*4882a593Smuzhiyun 82*4882a593Smuzhiyun@files = (); 83*4882a593Smuzhiyun 84*4882a593Smuzhiyunwhile (defined($a = shift(@ARGV))) { 85*4882a593Smuzhiyun if ($a =~ /^-/) { 86*4882a593Smuzhiyun if ($a eq '-width' || $a eq '-w') { 87*4882a593Smuzhiyun $max_width = shift(@ARGV)+0; 88*4882a593Smuzhiyun } else { 89*4882a593Smuzhiyun print STDERR "Usage: $name [-width #] files...\n"; 90*4882a593Smuzhiyun exit 1; 91*4882a593Smuzhiyun } 92*4882a593Smuzhiyun } else { 93*4882a593Smuzhiyun push(@files, $a); 94*4882a593Smuzhiyun } 95*4882a593Smuzhiyun} 96*4882a593Smuzhiyun 97*4882a593Smuzhiyunforeach $f ( @files ) { 98*4882a593Smuzhiyun print STDERR "$name: $f\n"; 99*4882a593Smuzhiyun 100*4882a593Smuzhiyun if (! -f $f) { 101*4882a593Smuzhiyun print STDERR "$f: not a file\n"; 102*4882a593Smuzhiyun next; 103*4882a593Smuzhiyun } 104*4882a593Smuzhiyun 105*4882a593Smuzhiyun if (!open(FILE, '+<', $f)) { 106*4882a593Smuzhiyun print STDERR "$name: Cannot open file: $f: $!\n"; 107*4882a593Smuzhiyun next; 108*4882a593Smuzhiyun } 109*4882a593Smuzhiyun 110*4882a593Smuzhiyun binmode FILE; 111*4882a593Smuzhiyun 112*4882a593Smuzhiyun # First, verify that it is not a binary file; consider any file 113*4882a593Smuzhiyun # with a zero byte to be a binary file. Is there any better, or 114*4882a593Smuzhiyun # additional, heuristic that should be applied? 115*4882a593Smuzhiyun $is_binary = 0; 116*4882a593Smuzhiyun 117*4882a593Smuzhiyun while (read(FILE, $data, 65536) > 0) { 118*4882a593Smuzhiyun if ($data =~ /\0/) { 119*4882a593Smuzhiyun $is_binary = 1; 120*4882a593Smuzhiyun last; 121*4882a593Smuzhiyun } 122*4882a593Smuzhiyun } 123*4882a593Smuzhiyun 124*4882a593Smuzhiyun if ($is_binary) { 125*4882a593Smuzhiyun print STDERR "$name: $f: binary file\n"; 126*4882a593Smuzhiyun next; 127*4882a593Smuzhiyun } 128*4882a593Smuzhiyun 129*4882a593Smuzhiyun seek(FILE, 0, 0); 130*4882a593Smuzhiyun 131*4882a593Smuzhiyun $in_bytes = 0; 132*4882a593Smuzhiyun $out_bytes = 0; 133*4882a593Smuzhiyun $blank_bytes = 0; 134*4882a593Smuzhiyun 135*4882a593Smuzhiyun @blanks = (); 136*4882a593Smuzhiyun @lines = (); 137*4882a593Smuzhiyun $lineno = 0; 138*4882a593Smuzhiyun 139*4882a593Smuzhiyun while ( defined($line = <FILE>) ) { 140*4882a593Smuzhiyun $lineno++; 141*4882a593Smuzhiyun $in_bytes += length($line); 142*4882a593Smuzhiyun $line =~ s/[ \t\r]*$//; # Remove trailing spaces 143*4882a593Smuzhiyun $line = clean_space_tabs($line); 144*4882a593Smuzhiyun 145*4882a593Smuzhiyun if ( $line eq "\n" ) { 146*4882a593Smuzhiyun push(@blanks, $line); 147*4882a593Smuzhiyun $blank_bytes += length($line); 148*4882a593Smuzhiyun } else { 149*4882a593Smuzhiyun push(@lines, @blanks); 150*4882a593Smuzhiyun $out_bytes += $blank_bytes; 151*4882a593Smuzhiyun push(@lines, $line); 152*4882a593Smuzhiyun $out_bytes += length($line); 153*4882a593Smuzhiyun @blanks = (); 154*4882a593Smuzhiyun $blank_bytes = 0; 155*4882a593Smuzhiyun } 156*4882a593Smuzhiyun 157*4882a593Smuzhiyun $l_width = strwidth($line); 158*4882a593Smuzhiyun if ($max_width && $l_width > $max_width) { 159*4882a593Smuzhiyun print STDERR 160*4882a593Smuzhiyun "$f:$lineno: line exceeds $max_width characters ($l_width)\n"; 161*4882a593Smuzhiyun } 162*4882a593Smuzhiyun } 163*4882a593Smuzhiyun 164*4882a593Smuzhiyun # Any blanks at the end of the file are discarded 165*4882a593Smuzhiyun 166*4882a593Smuzhiyun if ($in_bytes != $out_bytes) { 167*4882a593Smuzhiyun # Only write to the file if changed 168*4882a593Smuzhiyun seek(FILE, 0, 0); 169*4882a593Smuzhiyun print FILE @lines; 170*4882a593Smuzhiyun 171*4882a593Smuzhiyun if ( !defined($where = tell(FILE)) || 172*4882a593Smuzhiyun !truncate(FILE, $where) ) { 173*4882a593Smuzhiyun die "$name: Failed to truncate modified file: $f: $!\n"; 174*4882a593Smuzhiyun } 175*4882a593Smuzhiyun } 176*4882a593Smuzhiyun 177*4882a593Smuzhiyun close(FILE); 178*4882a593Smuzhiyun} 179