1*4882a593Smuzhiyun#!/usr/bin/perl -w 2*4882a593Smuzhiyun# 3*4882a593Smuzhiyun# Clean a patch file -- or directory of patch files -- of stealth whitespace. 4*4882a593Smuzhiyun# WARNING: this can be a highly destructive operation. Use with caution. 5*4882a593Smuzhiyun# 6*4882a593Smuzhiyun 7*4882a593Smuzhiyunuse bytes; 8*4882a593Smuzhiyunuse File::Basename; 9*4882a593Smuzhiyun 10*4882a593Smuzhiyun# Default options 11*4882a593Smuzhiyun$max_width = 79; 12*4882a593Smuzhiyun 13*4882a593Smuzhiyun# Clean up space-tab sequences, either by removing spaces or 14*4882a593Smuzhiyun# replacing them with tabs. 15*4882a593Smuzhiyunsub clean_space_tabs($) 16*4882a593Smuzhiyun{ 17*4882a593Smuzhiyun no bytes; # Tab alignment depends on characters 18*4882a593Smuzhiyun 19*4882a593Smuzhiyun my($li) = @_; 20*4882a593Smuzhiyun my($lo) = ''; 21*4882a593Smuzhiyun my $pos = 0; 22*4882a593Smuzhiyun my $nsp = 0; 23*4882a593Smuzhiyun my($i, $c); 24*4882a593Smuzhiyun 25*4882a593Smuzhiyun for ($i = 0; $i < length($li); $i++) { 26*4882a593Smuzhiyun $c = substr($li, $i, 1); 27*4882a593Smuzhiyun if ($c eq "\t") { 28*4882a593Smuzhiyun my $npos = ($pos+$nsp+8) & ~7; 29*4882a593Smuzhiyun my $ntab = ($npos >> 3) - ($pos >> 3); 30*4882a593Smuzhiyun $lo .= "\t" x $ntab; 31*4882a593Smuzhiyun $pos = $npos; 32*4882a593Smuzhiyun $nsp = 0; 33*4882a593Smuzhiyun } elsif ($c eq "\n" || $c eq "\r") { 34*4882a593Smuzhiyun $lo .= " " x $nsp; 35*4882a593Smuzhiyun $pos += $nsp; 36*4882a593Smuzhiyun $nsp = 0; 37*4882a593Smuzhiyun $lo .= $c; 38*4882a593Smuzhiyun $pos = 0; 39*4882a593Smuzhiyun } elsif ($c eq " ") { 40*4882a593Smuzhiyun $nsp++; 41*4882a593Smuzhiyun } else { 42*4882a593Smuzhiyun $lo .= " " x $nsp; 43*4882a593Smuzhiyun $pos += $nsp; 44*4882a593Smuzhiyun $nsp = 0; 45*4882a593Smuzhiyun $lo .= $c; 46*4882a593Smuzhiyun $pos++; 47*4882a593Smuzhiyun } 48*4882a593Smuzhiyun } 49*4882a593Smuzhiyun $lo .= " " x $nsp; 50*4882a593Smuzhiyun return $lo; 51*4882a593Smuzhiyun} 52*4882a593Smuzhiyun 53*4882a593Smuzhiyun# Compute the visual width of a string 54*4882a593Smuzhiyunsub strwidth($) { 55*4882a593Smuzhiyun no bytes; # Tab alignment depends on characters 56*4882a593Smuzhiyun 57*4882a593Smuzhiyun my($li) = @_; 58*4882a593Smuzhiyun my($c, $i); 59*4882a593Smuzhiyun my $pos = 0; 60*4882a593Smuzhiyun my $mlen = 0; 61*4882a593Smuzhiyun 62*4882a593Smuzhiyun for ($i = 0; $i < length($li); $i++) { 63*4882a593Smuzhiyun $c = substr($li,$i,1); 64*4882a593Smuzhiyun if ($c eq "\t") { 65*4882a593Smuzhiyun $pos = ($pos+8) & ~7; 66*4882a593Smuzhiyun } elsif ($c eq "\n") { 67*4882a593Smuzhiyun $mlen = $pos if ($pos > $mlen); 68*4882a593Smuzhiyun $pos = 0; 69*4882a593Smuzhiyun } else { 70*4882a593Smuzhiyun $pos++; 71*4882a593Smuzhiyun } 72*4882a593Smuzhiyun } 73*4882a593Smuzhiyun 74*4882a593Smuzhiyun $mlen = $pos if ($pos > $mlen); 75*4882a593Smuzhiyun return $mlen; 76*4882a593Smuzhiyun} 77*4882a593Smuzhiyun 78*4882a593Smuzhiyun$name = basename($0); 79*4882a593Smuzhiyun 80*4882a593Smuzhiyun@files = (); 81*4882a593Smuzhiyun 82*4882a593Smuzhiyunwhile (defined($a = shift(@ARGV))) { 83*4882a593Smuzhiyun if ($a =~ /^-/) { 84*4882a593Smuzhiyun if ($a eq '-width' || $a eq '-w') { 85*4882a593Smuzhiyun $max_width = shift(@ARGV)+0; 86*4882a593Smuzhiyun } else { 87*4882a593Smuzhiyun print STDERR "Usage: $name [-width #] files...\n"; 88*4882a593Smuzhiyun exit 1; 89*4882a593Smuzhiyun } 90*4882a593Smuzhiyun } else { 91*4882a593Smuzhiyun push(@files, $a); 92*4882a593Smuzhiyun } 93*4882a593Smuzhiyun} 94*4882a593Smuzhiyun 95*4882a593Smuzhiyunforeach $f ( @files ) { 96*4882a593Smuzhiyun print STDERR "$name: $f\n"; 97*4882a593Smuzhiyun 98*4882a593Smuzhiyun if (! -f $f) { 99*4882a593Smuzhiyun print STDERR "$f: not a file\n"; 100*4882a593Smuzhiyun next; 101*4882a593Smuzhiyun } 102*4882a593Smuzhiyun 103*4882a593Smuzhiyun if (!open(FILE, '+<', $f)) { 104*4882a593Smuzhiyun print STDERR "$name: Cannot open file: $f: $!\n"; 105*4882a593Smuzhiyun next; 106*4882a593Smuzhiyun } 107*4882a593Smuzhiyun 108*4882a593Smuzhiyun binmode FILE; 109*4882a593Smuzhiyun 110*4882a593Smuzhiyun # First, verify that it is not a binary file; consider any file 111*4882a593Smuzhiyun # with a zero byte to be a binary file. Is there any better, or 112*4882a593Smuzhiyun # additional, heuristic that should be applied? 113*4882a593Smuzhiyun $is_binary = 0; 114*4882a593Smuzhiyun 115*4882a593Smuzhiyun while (read(FILE, $data, 65536) > 0) { 116*4882a593Smuzhiyun if ($data =~ /\0/) { 117*4882a593Smuzhiyun $is_binary = 1; 118*4882a593Smuzhiyun last; 119*4882a593Smuzhiyun } 120*4882a593Smuzhiyun } 121*4882a593Smuzhiyun 122*4882a593Smuzhiyun if ($is_binary) { 123*4882a593Smuzhiyun print STDERR "$name: $f: binary file\n"; 124*4882a593Smuzhiyun next; 125*4882a593Smuzhiyun } 126*4882a593Smuzhiyun 127*4882a593Smuzhiyun seek(FILE, 0, 0); 128*4882a593Smuzhiyun 129*4882a593Smuzhiyun $in_bytes = 0; 130*4882a593Smuzhiyun $out_bytes = 0; 131*4882a593Smuzhiyun $lineno = 0; 132*4882a593Smuzhiyun 133*4882a593Smuzhiyun @lines = (); 134*4882a593Smuzhiyun 135*4882a593Smuzhiyun $in_hunk = 0; 136*4882a593Smuzhiyun $err = 0; 137*4882a593Smuzhiyun 138*4882a593Smuzhiyun while ( defined($line = <FILE>) ) { 139*4882a593Smuzhiyun $lineno++; 140*4882a593Smuzhiyun $in_bytes += length($line); 141*4882a593Smuzhiyun 142*4882a593Smuzhiyun if (!$in_hunk) { 143*4882a593Smuzhiyun if ($line =~ 144*4882a593Smuzhiyun /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@/) { 145*4882a593Smuzhiyun $minus_lines = $2; 146*4882a593Smuzhiyun $plus_lines = $4; 147*4882a593Smuzhiyun if ($minus_lines || $plus_lines) { 148*4882a593Smuzhiyun $in_hunk = 1; 149*4882a593Smuzhiyun @hunk_lines = ($line); 150*4882a593Smuzhiyun } 151*4882a593Smuzhiyun } else { 152*4882a593Smuzhiyun push(@lines, $line); 153*4882a593Smuzhiyun $out_bytes += length($line); 154*4882a593Smuzhiyun } 155*4882a593Smuzhiyun } else { 156*4882a593Smuzhiyun # We're in a hunk 157*4882a593Smuzhiyun 158*4882a593Smuzhiyun if ($line =~ /^\+/) { 159*4882a593Smuzhiyun $plus_lines--; 160*4882a593Smuzhiyun 161*4882a593Smuzhiyun $text = substr($line, 1); 162*4882a593Smuzhiyun $text =~ s/[ \t\r]*$//; # Remove trailing spaces 163*4882a593Smuzhiyun $text = clean_space_tabs($text); 164*4882a593Smuzhiyun 165*4882a593Smuzhiyun $l_width = strwidth($text); 166*4882a593Smuzhiyun if ($max_width && $l_width > $max_width) { 167*4882a593Smuzhiyun print STDERR 168*4882a593Smuzhiyun "$f:$lineno: adds line exceeds $max_width ", 169*4882a593Smuzhiyun "characters ($l_width)\n"; 170*4882a593Smuzhiyun } 171*4882a593Smuzhiyun 172*4882a593Smuzhiyun push(@hunk_lines, '+'.$text); 173*4882a593Smuzhiyun } elsif ($line =~ /^\-/) { 174*4882a593Smuzhiyun $minus_lines--; 175*4882a593Smuzhiyun push(@hunk_lines, $line); 176*4882a593Smuzhiyun } elsif ($line =~ /^ /) { 177*4882a593Smuzhiyun $plus_lines--; 178*4882a593Smuzhiyun $minus_lines--; 179*4882a593Smuzhiyun push(@hunk_lines, $line); 180*4882a593Smuzhiyun } else { 181*4882a593Smuzhiyun print STDERR "$name: $f: malformed patch\n"; 182*4882a593Smuzhiyun $err = 1; 183*4882a593Smuzhiyun last; 184*4882a593Smuzhiyun } 185*4882a593Smuzhiyun 186*4882a593Smuzhiyun if ($plus_lines < 0 || $minus_lines < 0) { 187*4882a593Smuzhiyun print STDERR "$name: $f: malformed patch\n"; 188*4882a593Smuzhiyun $err = 1; 189*4882a593Smuzhiyun last; 190*4882a593Smuzhiyun } elsif ($plus_lines == 0 && $minus_lines == 0) { 191*4882a593Smuzhiyun # End of a hunk. Process this hunk. 192*4882a593Smuzhiyun my $i; 193*4882a593Smuzhiyun my $l; 194*4882a593Smuzhiyun my @h = (); 195*4882a593Smuzhiyun my $adj = 0; 196*4882a593Smuzhiyun my $done = 0; 197*4882a593Smuzhiyun 198*4882a593Smuzhiyun for ($i = scalar(@hunk_lines)-1; $i > 0; $i--) { 199*4882a593Smuzhiyun $l = $hunk_lines[$i]; 200*4882a593Smuzhiyun if (!$done && $l eq "+\n") { 201*4882a593Smuzhiyun $adj++; # Skip this line 202*4882a593Smuzhiyun } elsif ($l =~ /^[ +]/) { 203*4882a593Smuzhiyun $done = 1; 204*4882a593Smuzhiyun unshift(@h, $l); 205*4882a593Smuzhiyun } else { 206*4882a593Smuzhiyun unshift(@h, $l); 207*4882a593Smuzhiyun } 208*4882a593Smuzhiyun } 209*4882a593Smuzhiyun 210*4882a593Smuzhiyun $l = $hunk_lines[0]; # Hunk header 211*4882a593Smuzhiyun undef @hunk_lines; # Free memory 212*4882a593Smuzhiyun 213*4882a593Smuzhiyun if ($adj) { 214*4882a593Smuzhiyun die unless 215*4882a593Smuzhiyun ($l =~ /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@(.*)$/); 216*4882a593Smuzhiyun my $mstart = $1; 217*4882a593Smuzhiyun my $mlin = $2; 218*4882a593Smuzhiyun my $pstart = $3; 219*4882a593Smuzhiyun my $plin = $4; 220*4882a593Smuzhiyun my $tail = $5; # doesn't include the final newline 221*4882a593Smuzhiyun 222*4882a593Smuzhiyun $l = sprintf("@@ -%d,%d +%d,%d @@%s\n", 223*4882a593Smuzhiyun $mstart, $mlin, $pstart, $plin-$adj, 224*4882a593Smuzhiyun $tail); 225*4882a593Smuzhiyun } 226*4882a593Smuzhiyun unshift(@h, $l); 227*4882a593Smuzhiyun 228*4882a593Smuzhiyun # Transfer to the output array 229*4882a593Smuzhiyun foreach $l (@h) { 230*4882a593Smuzhiyun $out_bytes += length($l); 231*4882a593Smuzhiyun push(@lines, $l); 232*4882a593Smuzhiyun } 233*4882a593Smuzhiyun 234*4882a593Smuzhiyun $in_hunk = 0; 235*4882a593Smuzhiyun } 236*4882a593Smuzhiyun } 237*4882a593Smuzhiyun } 238*4882a593Smuzhiyun 239*4882a593Smuzhiyun if ($in_hunk) { 240*4882a593Smuzhiyun print STDERR "$name: $f: malformed patch\n"; 241*4882a593Smuzhiyun $err = 1; 242*4882a593Smuzhiyun } 243*4882a593Smuzhiyun 244*4882a593Smuzhiyun if (!$err) { 245*4882a593Smuzhiyun if ($in_bytes != $out_bytes) { 246*4882a593Smuzhiyun # Only write to the file if changed 247*4882a593Smuzhiyun seek(FILE, 0, 0); 248*4882a593Smuzhiyun print FILE @lines; 249*4882a593Smuzhiyun 250*4882a593Smuzhiyun if ( !defined($where = tell(FILE)) || 251*4882a593Smuzhiyun !truncate(FILE, $where) ) { 252*4882a593Smuzhiyun die "$name: Failed to truncate modified file: $f: $!\n"; 253*4882a593Smuzhiyun } 254*4882a593Smuzhiyun } 255*4882a593Smuzhiyun } 256*4882a593Smuzhiyun 257*4882a593Smuzhiyun close(FILE); 258*4882a593Smuzhiyun} 259