xref: /utopia/UTPA2-700.0.x/projects/build/scripts/cleanfile (revision 53ee8cc121a030b8d368113ac3e966b4705770ef)
1*53ee8cc1Swenshuai.xi#!/usr/bin/perl -w
2*53ee8cc1Swenshuai.xi#
3*53ee8cc1Swenshuai.xi# Clean a text file -- or directory of text files -- of stealth whitespace.
4*53ee8cc1Swenshuai.xi# WARNING: this can be a highly destructive operation.  Use with caution.
5*53ee8cc1Swenshuai.xi#
6*53ee8cc1Swenshuai.xi
7*53ee8cc1Swenshuai.xiuse bytes;
8*53ee8cc1Swenshuai.xiuse File::Basename;
9*53ee8cc1Swenshuai.xi
10*53ee8cc1Swenshuai.xi# Default options
11*53ee8cc1Swenshuai.xi$max_width = 79;
12*53ee8cc1Swenshuai.xi
13*53ee8cc1Swenshuai.xi# Clean up space-tab sequences, either by removing spaces or
14*53ee8cc1Swenshuai.xi# replacing them with tabs.
15*53ee8cc1Swenshuai.xisub clean_space_tabs($)
16*53ee8cc1Swenshuai.xi{
17*53ee8cc1Swenshuai.xi    no bytes;			# Tab alignment depends on characters
18*53ee8cc1Swenshuai.xi
19*53ee8cc1Swenshuai.xi    my($li) = @_;
20*53ee8cc1Swenshuai.xi    my($lo) = '';
21*53ee8cc1Swenshuai.xi    my $pos = 0;
22*53ee8cc1Swenshuai.xi    my $nsp = 0;
23*53ee8cc1Swenshuai.xi    my($i, $c);
24*53ee8cc1Swenshuai.xi
25*53ee8cc1Swenshuai.xi    for ($i = 0; $i < length($li); $i++) {
26*53ee8cc1Swenshuai.xi	$c = substr($li, $i, 1);
27*53ee8cc1Swenshuai.xi	if ($c eq "\t") {
28*53ee8cc1Swenshuai.xi	    my $npos = ($pos+$nsp+8) & ~7;
29*53ee8cc1Swenshuai.xi	    my $ntab = ($npos >> 3) - ($pos >> 3);
30*53ee8cc1Swenshuai.xi	    $lo .= "\t" x $ntab;
31*53ee8cc1Swenshuai.xi	    $pos = $npos;
32*53ee8cc1Swenshuai.xi	    $nsp = 0;
33*53ee8cc1Swenshuai.xi	} elsif ($c eq "\n" || $c eq "\r") {
34*53ee8cc1Swenshuai.xi	    $lo .= " " x $nsp;
35*53ee8cc1Swenshuai.xi	    $pos += $nsp;
36*53ee8cc1Swenshuai.xi	    $nsp = 0;
37*53ee8cc1Swenshuai.xi	    $lo .= $c;
38*53ee8cc1Swenshuai.xi	    $pos = 0;
39*53ee8cc1Swenshuai.xi	} elsif ($c eq " ") {
40*53ee8cc1Swenshuai.xi	    $nsp++;
41*53ee8cc1Swenshuai.xi	} else {
42*53ee8cc1Swenshuai.xi	    $lo .= " " x $nsp;
43*53ee8cc1Swenshuai.xi	    $pos += $nsp;
44*53ee8cc1Swenshuai.xi	    $nsp = 0;
45*53ee8cc1Swenshuai.xi	    $lo .= $c;
46*53ee8cc1Swenshuai.xi	    $pos++;
47*53ee8cc1Swenshuai.xi	}
48*53ee8cc1Swenshuai.xi    }
49*53ee8cc1Swenshuai.xi    $lo .= " " x $nsp;
50*53ee8cc1Swenshuai.xi    return $lo;
51*53ee8cc1Swenshuai.xi}
52*53ee8cc1Swenshuai.xi
53*53ee8cc1Swenshuai.xi# Compute the visual width of a string
54*53ee8cc1Swenshuai.xisub strwidth($) {
55*53ee8cc1Swenshuai.xi    no bytes;			# Tab alignment depends on characters
56*53ee8cc1Swenshuai.xi
57*53ee8cc1Swenshuai.xi    my($li) = @_;
58*53ee8cc1Swenshuai.xi    my($c, $i);
59*53ee8cc1Swenshuai.xi    my $pos = 0;
60*53ee8cc1Swenshuai.xi    my $mlen = 0;
61*53ee8cc1Swenshuai.xi
62*53ee8cc1Swenshuai.xi    for ($i = 0; $i < length($li); $i++) {
63*53ee8cc1Swenshuai.xi	$c = substr($li,$i,1);
64*53ee8cc1Swenshuai.xi	if ($c eq "\t") {
65*53ee8cc1Swenshuai.xi	    $pos = ($pos+8) & ~7;
66*53ee8cc1Swenshuai.xi	} elsif ($c eq "\n") {
67*53ee8cc1Swenshuai.xi	    $mlen = $pos if ($pos > $mlen);
68*53ee8cc1Swenshuai.xi	    $pos = 0;
69*53ee8cc1Swenshuai.xi	} else {
70*53ee8cc1Swenshuai.xi	    $pos++;
71*53ee8cc1Swenshuai.xi	}
72*53ee8cc1Swenshuai.xi    }
73*53ee8cc1Swenshuai.xi
74*53ee8cc1Swenshuai.xi    $mlen = $pos if ($pos > $mlen);
75*53ee8cc1Swenshuai.xi    return $mlen;
76*53ee8cc1Swenshuai.xi}
77*53ee8cc1Swenshuai.xi
78*53ee8cc1Swenshuai.xi$name = basename($0);
79*53ee8cc1Swenshuai.xi
80*53ee8cc1Swenshuai.xi@files = ();
81*53ee8cc1Swenshuai.xi
82*53ee8cc1Swenshuai.xiwhile (defined($a = shift(@ARGV))) {
83*53ee8cc1Swenshuai.xi    if ($a =~ /^-/) {
84*53ee8cc1Swenshuai.xi	if ($a eq '-width' || $a eq '-w') {
85*53ee8cc1Swenshuai.xi	    $max_width = shift(@ARGV)+0;
86*53ee8cc1Swenshuai.xi	} else {
87*53ee8cc1Swenshuai.xi	    print STDERR "Usage: $name [-width #] files...\n";
88*53ee8cc1Swenshuai.xi	    exit 1;
89*53ee8cc1Swenshuai.xi	}
90*53ee8cc1Swenshuai.xi    } else {
91*53ee8cc1Swenshuai.xi	push(@files, $a);
92*53ee8cc1Swenshuai.xi    }
93*53ee8cc1Swenshuai.xi}
94*53ee8cc1Swenshuai.xi
95*53ee8cc1Swenshuai.xiforeach $f ( @files ) {
96*53ee8cc1Swenshuai.xi    print STDERR "$name: $f\n";
97*53ee8cc1Swenshuai.xi
98*53ee8cc1Swenshuai.xi    if (! -f $f) {
99*53ee8cc1Swenshuai.xi	print STDERR "$f: not a file\n";
100*53ee8cc1Swenshuai.xi	next;
101*53ee8cc1Swenshuai.xi    }
102*53ee8cc1Swenshuai.xi
103*53ee8cc1Swenshuai.xi    if (!open(FILE, '+<', $f)) {
104*53ee8cc1Swenshuai.xi	print STDERR "$name: Cannot open file: $f: $!\n";
105*53ee8cc1Swenshuai.xi	next;
106*53ee8cc1Swenshuai.xi    }
107*53ee8cc1Swenshuai.xi
108*53ee8cc1Swenshuai.xi    binmode FILE;
109*53ee8cc1Swenshuai.xi
110*53ee8cc1Swenshuai.xi    # First, verify that it is not a binary file; consider any file
111*53ee8cc1Swenshuai.xi    # with a zero byte to be a binary file.  Is there any better, or
112*53ee8cc1Swenshuai.xi    # additional, heuristic that should be applied?
113*53ee8cc1Swenshuai.xi    $is_binary = 0;
114*53ee8cc1Swenshuai.xi
115*53ee8cc1Swenshuai.xi    while (read(FILE, $data, 65536) > 0) {
116*53ee8cc1Swenshuai.xi	if ($data =~ /\0/) {
117*53ee8cc1Swenshuai.xi	    $is_binary = 1;
118*53ee8cc1Swenshuai.xi	    last;
119*53ee8cc1Swenshuai.xi	}
120*53ee8cc1Swenshuai.xi    }
121*53ee8cc1Swenshuai.xi
122*53ee8cc1Swenshuai.xi    if ($is_binary) {
123*53ee8cc1Swenshuai.xi	print STDERR "$name: $f: binary file\n";
124*53ee8cc1Swenshuai.xi	next;
125*53ee8cc1Swenshuai.xi    }
126*53ee8cc1Swenshuai.xi
127*53ee8cc1Swenshuai.xi    seek(FILE, 0, 0);
128*53ee8cc1Swenshuai.xi
129*53ee8cc1Swenshuai.xi    $in_bytes = 0;
130*53ee8cc1Swenshuai.xi    $out_bytes = 0;
131*53ee8cc1Swenshuai.xi    $blank_bytes = 0;
132*53ee8cc1Swenshuai.xi
133*53ee8cc1Swenshuai.xi    @blanks = ();
134*53ee8cc1Swenshuai.xi    @lines  = ();
135*53ee8cc1Swenshuai.xi    $lineno = 0;
136*53ee8cc1Swenshuai.xi
137*53ee8cc1Swenshuai.xi    while ( defined($line = <FILE>) ) {
138*53ee8cc1Swenshuai.xi	$lineno++;
139*53ee8cc1Swenshuai.xi	$in_bytes += length($line);
140*53ee8cc1Swenshuai.xi	$line =~ s/[ \t\r]*$//;		# Remove trailing spaces
141*53ee8cc1Swenshuai.xi	$line = clean_space_tabs($line);
142*53ee8cc1Swenshuai.xi
143*53ee8cc1Swenshuai.xi	if ( $line eq "\n" ) {
144*53ee8cc1Swenshuai.xi	    push(@blanks, $line);
145*53ee8cc1Swenshuai.xi	    $blank_bytes += length($line);
146*53ee8cc1Swenshuai.xi	} else {
147*53ee8cc1Swenshuai.xi	    push(@lines, @blanks);
148*53ee8cc1Swenshuai.xi	    $out_bytes += $blank_bytes;
149*53ee8cc1Swenshuai.xi	    push(@lines, $line);
150*53ee8cc1Swenshuai.xi	    $out_bytes += length($line);
151*53ee8cc1Swenshuai.xi	    @blanks = ();
152*53ee8cc1Swenshuai.xi	    $blank_bytes = 0;
153*53ee8cc1Swenshuai.xi	}
154*53ee8cc1Swenshuai.xi
155*53ee8cc1Swenshuai.xi	$l_width = strwidth($line);
156*53ee8cc1Swenshuai.xi	if ($max_width && $l_width > $max_width) {
157*53ee8cc1Swenshuai.xi	    print STDERR
158*53ee8cc1Swenshuai.xi		"$f:$lineno: line exceeds $max_width characters ($l_width)\n";
159*53ee8cc1Swenshuai.xi	}
160*53ee8cc1Swenshuai.xi    }
161*53ee8cc1Swenshuai.xi
162*53ee8cc1Swenshuai.xi    # Any blanks at the end of the file are discarded
163*53ee8cc1Swenshuai.xi
164*53ee8cc1Swenshuai.xi    if ($in_bytes != $out_bytes) {
165*53ee8cc1Swenshuai.xi	# Only write to the file if changed
166*53ee8cc1Swenshuai.xi	seek(FILE, 0, 0);
167*53ee8cc1Swenshuai.xi	print FILE @lines;
168*53ee8cc1Swenshuai.xi
169*53ee8cc1Swenshuai.xi	if ( !defined($where = tell(FILE)) ||
170*53ee8cc1Swenshuai.xi	     !truncate(FILE, $where) ) {
171*53ee8cc1Swenshuai.xi	    die "$name: Failed to truncate modified file: $f: $!\n";
172*53ee8cc1Swenshuai.xi	}
173*53ee8cc1Swenshuai.xi    }
174*53ee8cc1Swenshuai.xi
175*53ee8cc1Swenshuai.xi    close(FILE);
176*53ee8cc1Swenshuai.xi}
177