1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
|
#! /usr/bin/perl
# replace identical files with hard links
use Getopt::Long;
use File::stat;
$md5_0 = "d41d8cd98f00b204e9800998ecf8427e";
$opt_link = 0;
$opt_quiet = 0;
GetOptions(
link => \$opt_link,
quiet => \$opt_quiet,
);
for $dir (@ARGV) {
for (`find '$dir' -type f -print0 | xargs -0r md5sum`) {
chomp;
if(/^(\S+) (.*)/) {
$m = $1;
$f = $2;
push @{$md5{$m}}, $f;
}
}
}
$total_size = 0;
$empty_files = 0;
for $m (sort keys %md5) {
if(@{$md5{$m}} > 1) {
$size = 0;
$cnt = 0;
undef %ino;
undef @buf;
for $f (@{$md5{$m}}) {
$sb = stat $f;
die "oops: stat failed on \"$f\"\n" unless defined $sb;
$size = $sb->size;
$cnt++ unless $ino{$sb->ino};
$ino{$sb->ino} = 1;
push @buf, sprintf(" %04o %04d:%04d %5d %6d \"%s\"\n",
$sb->mode & 07777, $sb->uid, $sb->gid, $sb->ino, $sb->size, $f);
}
if(!$opt_quiet) {
printf "%s: %d (%d)\n", $m, ($cnt - 1) * $size, $cnt - 1;
print @buf;
}
$total_size += ($cnt - 1) * $size;
if($opt_link && $m ne $md5_0) {
$ref = undef;
for $f (@{$md5{$m}}) {
if($ref) {
unlink $f;
link $ref, $f;
}
else {
$ref = $f;
}
}
}
}
}
$empty_files = @{$md5{$md5_0}};
print "\n" unless $opt_quiet;
printf "total saved size: %d\n", $total_size;
printf " empty files: %d\n", $empty_files;
|