Here is a Perl script that I use to automatically create hardlink of my duplicated files.
user@host:~$ SEARCH=/data; find $SEARCH -not -empty -type f -printf %s\\n | sort -rn | uniq -d | xargs -I{} -n1 find $SEARCH -type f -size {}c -print0 | xargs -0 md5sum | sort | uniq -w32 --all-repeated=separate > duplicated_files
user@host:~$ sed -i 's/[0-9a-f]*\s\s//' duplicated_files
user@host:~$ deduphard.pl duplicated_files
#!/usr/bin/perl
# Role : perl script which create hard links with duplicated files
# Author : http://shebangthedolphins.net/
# Instructions :
# - create a file of duplicated files : SEARCH=/data; find $SEARCH -not -empty -type f -printf %s\\n | sort -rn | uniq -d | xargs -I{} -n1 find $SEARCH -type f -size {}c -print0 | xargs -0 md5sum | sort | uniq -w32 --all-repeated=separate > duplicated_files
# - format the file in order to be exploited in deleting hash datas
# * sed -i 's/[0-9a-f]*\s\s//' fichier
# 1.0 first version
use strict;
use warnings;
my $fichier = $ARGV[0];
my $firstline; #contain reference line
my $stateligne;
my $temoin = 0; #flag if void line
my $source;
my $destination;
open (F, '<', "$fichier") || die "Error: $!"; #open file
while (my $ligne = <F>) { #loop to read file line by line
$stateligne=&lignevide($ligne); #call lignevide function to know if the line is void
if ($stateligne==1){ #if void line
$temoin = 0;
}
if ($stateligne==0 and $temoin==1){ #if current line is not void and last line not void
$source=$firstline; #first line is the source
$destination=$ligne; #new line is the destination
chop($source); #delete carriage return
chop($destination); #delete carriage return
print "hardlink : ln $source $destination\n\n"; #print information
`ln -fi "$source" "$destination"`; #-f : delete destination files; -i ask before delete
}
if ($stateligne==0 and $temoin==0){ #if current line is not void and if last line is void
$firstline = $ligne; #put current line inside $firstline
$temoin = 1; #flag reference line
}
}
#check void line function, return 1 if void line
sub lignevide {
my $func_ligne = $_[0];
if ($func_ligne =~m/^$/){
return 1;
}
else{
return 0;
}
}
Contact :