1*53ee8cc1Swenshuai.xi#!/usr/bin/perl -w 2*53ee8cc1Swenshuai.xi# 3*53ee8cc1Swenshuai.xi# Clean a text file -- or directory of text files -- of stealth whitespace. 4*53ee8cc1Swenshuai.xi# WARNING: this can be a highly destructive operation. Use with caution. 5*53ee8cc1Swenshuai.xi# 6*53ee8cc1Swenshuai.xi 7*53ee8cc1Swenshuai.xiuse bytes; 8*53ee8cc1Swenshuai.xiuse File::Basename; 9*53ee8cc1Swenshuai.xi 10*53ee8cc1Swenshuai.xi# Default options 11*53ee8cc1Swenshuai.xi$max_width = 79; 12*53ee8cc1Swenshuai.xi 13*53ee8cc1Swenshuai.xi# Clean up space-tab sequences, either by removing spaces or 14*53ee8cc1Swenshuai.xi# replacing them with tabs. 15*53ee8cc1Swenshuai.xisub clean_space_tabs($) 16*53ee8cc1Swenshuai.xi{ 17*53ee8cc1Swenshuai.xi no bytes; # Tab alignment depends on characters 18*53ee8cc1Swenshuai.xi 19*53ee8cc1Swenshuai.xi my($li) = @_; 20*53ee8cc1Swenshuai.xi my($lo) = ''; 21*53ee8cc1Swenshuai.xi my $pos = 0; 22*53ee8cc1Swenshuai.xi my $nsp = 0; 23*53ee8cc1Swenshuai.xi my($i, $c); 24*53ee8cc1Swenshuai.xi 25*53ee8cc1Swenshuai.xi for ($i = 0; $i < length($li); $i++) { 26*53ee8cc1Swenshuai.xi $c = substr($li, $i, 1); 27*53ee8cc1Swenshuai.xi if ($c eq "\t") { 28*53ee8cc1Swenshuai.xi my $npos = ($pos+$nsp+8) & ~7; 29*53ee8cc1Swenshuai.xi my $ntab = ($npos >> 3) - ($pos >> 3); 30*53ee8cc1Swenshuai.xi $lo .= "\t" x $ntab; 31*53ee8cc1Swenshuai.xi $pos = $npos; 32*53ee8cc1Swenshuai.xi $nsp = 0; 33*53ee8cc1Swenshuai.xi } elsif ($c eq "\n" || $c eq "\r") { 34*53ee8cc1Swenshuai.xi $lo .= " " x $nsp; 35*53ee8cc1Swenshuai.xi $pos += $nsp; 36*53ee8cc1Swenshuai.xi $nsp = 0; 37*53ee8cc1Swenshuai.xi $lo .= $c; 38*53ee8cc1Swenshuai.xi $pos = 0; 39*53ee8cc1Swenshuai.xi } elsif ($c eq " ") { 40*53ee8cc1Swenshuai.xi $nsp++; 41*53ee8cc1Swenshuai.xi } else { 42*53ee8cc1Swenshuai.xi $lo .= " " x $nsp; 43*53ee8cc1Swenshuai.xi $pos += $nsp; 44*53ee8cc1Swenshuai.xi $nsp = 0; 45*53ee8cc1Swenshuai.xi $lo .= $c; 46*53ee8cc1Swenshuai.xi $pos++; 47*53ee8cc1Swenshuai.xi } 48*53ee8cc1Swenshuai.xi } 49*53ee8cc1Swenshuai.xi $lo .= " " x $nsp; 50*53ee8cc1Swenshuai.xi return $lo; 51*53ee8cc1Swenshuai.xi} 52*53ee8cc1Swenshuai.xi 53*53ee8cc1Swenshuai.xi# Compute the visual width of a string 54*53ee8cc1Swenshuai.xisub strwidth($) { 55*53ee8cc1Swenshuai.xi no bytes; # Tab alignment depends on characters 56*53ee8cc1Swenshuai.xi 57*53ee8cc1Swenshuai.xi my($li) = @_; 58*53ee8cc1Swenshuai.xi my($c, $i); 59*53ee8cc1Swenshuai.xi my $pos = 0; 60*53ee8cc1Swenshuai.xi my $mlen = 0; 61*53ee8cc1Swenshuai.xi 62*53ee8cc1Swenshuai.xi for ($i = 0; $i < length($li); $i++) { 63*53ee8cc1Swenshuai.xi $c = substr($li,$i,1); 64*53ee8cc1Swenshuai.xi if ($c eq "\t") { 65*53ee8cc1Swenshuai.xi $pos = ($pos+8) & ~7; 66*53ee8cc1Swenshuai.xi } elsif ($c eq "\n") { 67*53ee8cc1Swenshuai.xi $mlen = $pos if ($pos > $mlen); 68*53ee8cc1Swenshuai.xi $pos = 0; 69*53ee8cc1Swenshuai.xi } else { 70*53ee8cc1Swenshuai.xi $pos++; 71*53ee8cc1Swenshuai.xi } 72*53ee8cc1Swenshuai.xi } 73*53ee8cc1Swenshuai.xi 74*53ee8cc1Swenshuai.xi $mlen = $pos if ($pos > $mlen); 75*53ee8cc1Swenshuai.xi return $mlen; 76*53ee8cc1Swenshuai.xi} 77*53ee8cc1Swenshuai.xi 78*53ee8cc1Swenshuai.xi$name = basename($0); 79*53ee8cc1Swenshuai.xi 80*53ee8cc1Swenshuai.xi@files = (); 81*53ee8cc1Swenshuai.xi 82*53ee8cc1Swenshuai.xiwhile (defined($a = shift(@ARGV))) { 83*53ee8cc1Swenshuai.xi if ($a =~ /^-/) { 84*53ee8cc1Swenshuai.xi if ($a eq '-width' || $a eq '-w') { 85*53ee8cc1Swenshuai.xi $max_width = shift(@ARGV)+0; 86*53ee8cc1Swenshuai.xi } else { 87*53ee8cc1Swenshuai.xi print STDERR "Usage: $name [-width #] files...\n"; 88*53ee8cc1Swenshuai.xi exit 1; 89*53ee8cc1Swenshuai.xi } 90*53ee8cc1Swenshuai.xi } else { 91*53ee8cc1Swenshuai.xi push(@files, $a); 92*53ee8cc1Swenshuai.xi } 93*53ee8cc1Swenshuai.xi} 94*53ee8cc1Swenshuai.xi 95*53ee8cc1Swenshuai.xiforeach $f ( @files ) { 96*53ee8cc1Swenshuai.xi print STDERR "$name: $f\n"; 97*53ee8cc1Swenshuai.xi 98*53ee8cc1Swenshuai.xi if (! -f $f) { 99*53ee8cc1Swenshuai.xi print STDERR "$f: not a file\n"; 100*53ee8cc1Swenshuai.xi next; 101*53ee8cc1Swenshuai.xi } 102*53ee8cc1Swenshuai.xi 103*53ee8cc1Swenshuai.xi if (!open(FILE, '+<', $f)) { 104*53ee8cc1Swenshuai.xi print STDERR "$name: Cannot open file: $f: $!\n"; 105*53ee8cc1Swenshuai.xi next; 106*53ee8cc1Swenshuai.xi } 107*53ee8cc1Swenshuai.xi 108*53ee8cc1Swenshuai.xi binmode FILE; 109*53ee8cc1Swenshuai.xi 110*53ee8cc1Swenshuai.xi # First, verify that it is not a binary file; consider any file 111*53ee8cc1Swenshuai.xi # with a zero byte to be a binary file. Is there any better, or 112*53ee8cc1Swenshuai.xi # additional, heuristic that should be applied? 113*53ee8cc1Swenshuai.xi $is_binary = 0; 114*53ee8cc1Swenshuai.xi 115*53ee8cc1Swenshuai.xi while (read(FILE, $data, 65536) > 0) { 116*53ee8cc1Swenshuai.xi if ($data =~ /\0/) { 117*53ee8cc1Swenshuai.xi $is_binary = 1; 118*53ee8cc1Swenshuai.xi last; 119*53ee8cc1Swenshuai.xi } 120*53ee8cc1Swenshuai.xi } 121*53ee8cc1Swenshuai.xi 122*53ee8cc1Swenshuai.xi if ($is_binary) { 123*53ee8cc1Swenshuai.xi print STDERR "$name: $f: binary file\n"; 124*53ee8cc1Swenshuai.xi next; 125*53ee8cc1Swenshuai.xi } 126*53ee8cc1Swenshuai.xi 127*53ee8cc1Swenshuai.xi seek(FILE, 0, 0); 128*53ee8cc1Swenshuai.xi 129*53ee8cc1Swenshuai.xi $in_bytes = 0; 130*53ee8cc1Swenshuai.xi $out_bytes = 0; 131*53ee8cc1Swenshuai.xi $blank_bytes = 0; 132*53ee8cc1Swenshuai.xi 133*53ee8cc1Swenshuai.xi @blanks = (); 134*53ee8cc1Swenshuai.xi @lines = (); 135*53ee8cc1Swenshuai.xi $lineno = 0; 136*53ee8cc1Swenshuai.xi 137*53ee8cc1Swenshuai.xi while ( defined($line = <FILE>) ) { 138*53ee8cc1Swenshuai.xi $lineno++; 139*53ee8cc1Swenshuai.xi $in_bytes += length($line); 140*53ee8cc1Swenshuai.xi $line =~ s/[ \t\r]*$//; # Remove trailing spaces 141*53ee8cc1Swenshuai.xi $line = clean_space_tabs($line); 142*53ee8cc1Swenshuai.xi 143*53ee8cc1Swenshuai.xi if ( $line eq "\n" ) { 144*53ee8cc1Swenshuai.xi push(@blanks, $line); 145*53ee8cc1Swenshuai.xi $blank_bytes += length($line); 146*53ee8cc1Swenshuai.xi } else { 147*53ee8cc1Swenshuai.xi push(@lines, @blanks); 148*53ee8cc1Swenshuai.xi $out_bytes += $blank_bytes; 149*53ee8cc1Swenshuai.xi push(@lines, $line); 150*53ee8cc1Swenshuai.xi $out_bytes += length($line); 151*53ee8cc1Swenshuai.xi @blanks = (); 152*53ee8cc1Swenshuai.xi $blank_bytes = 0; 153*53ee8cc1Swenshuai.xi } 154*53ee8cc1Swenshuai.xi 155*53ee8cc1Swenshuai.xi $l_width = strwidth($line); 156*53ee8cc1Swenshuai.xi if ($max_width && $l_width > $max_width) { 157*53ee8cc1Swenshuai.xi print STDERR 158*53ee8cc1Swenshuai.xi "$f:$lineno: line exceeds $max_width characters ($l_width)\n"; 159*53ee8cc1Swenshuai.xi } 160*53ee8cc1Swenshuai.xi } 161*53ee8cc1Swenshuai.xi 162*53ee8cc1Swenshuai.xi # Any blanks at the end of the file are discarded 163*53ee8cc1Swenshuai.xi 164*53ee8cc1Swenshuai.xi if ($in_bytes != $out_bytes) { 165*53ee8cc1Swenshuai.xi # Only write to the file if changed 166*53ee8cc1Swenshuai.xi seek(FILE, 0, 0); 167*53ee8cc1Swenshuai.xi print FILE @lines; 168*53ee8cc1Swenshuai.xi 169*53ee8cc1Swenshuai.xi if ( !defined($where = tell(FILE)) || 170*53ee8cc1Swenshuai.xi !truncate(FILE, $where) ) { 171*53ee8cc1Swenshuai.xi die "$name: Failed to truncate modified file: $f: $!\n"; 172*53ee8cc1Swenshuai.xi } 173*53ee8cc1Swenshuai.xi } 174*53ee8cc1Swenshuai.xi 175*53ee8cc1Swenshuai.xi close(FILE); 176*53ee8cc1Swenshuai.xi} 177