#!/usr/bin/env perl use strict; use warnings; use Digest::MD5; use File::Compare; # scope for our class variable { # this code tickles a lot of what appear to be bugs in use strict refs #no strict 'refs'; # class variable our $prefix_length = 16; package lazy_file_comparator; sub new { my $class = shift; my $filename = shift; if (@_) { die "too many arguments to lazy_file_comparator->new"; } my $self = { filename => $filename, device => undef, inode => undef, size => undef, prefix => undef, hash => undef }; # foo bless $self, $class; return $self; } use overload ('<=>' => \&threeway_compare); sub threeway_compare { # my $ind = 0; # foreach (@_) # { # printf "%d %d %s\n", $ind, length $_, $_; # $ind++; # } my $this = shift; #print $this; my $that = shift; #print $that; # my $inverted = shift; # print $inverted; # if (@_) # { # die "Too many arguments to threeway_compare"; # print @_; # } # if ($inverted) # { # my $temp = $this; # $this = $that; # $that = $temp; # } unless (defined $this->{device}) { my @this_statbuf = stat $this->{filename}; $this->{device} = $this_statbuf[0]; $this->{inode} = $this_statbuf[1]; $this->{size} = $this_statbuf[7]; } unless (defined $that->{device}) { my @that_statbuf = stat $that->{filename}; $that->{device} = $that_statbuf[0]; $that->{inode} = $that_statbuf[1]; $that->{size} = $that_statbuf[7]; } # A file is supposed to be identical if the device and inode numbers are the same # Supposedly there's a webdav client that breaks this assumption though. # Cygwin too. if (($this->{device} == $that->{device}) and ($this->{inode} == $that->{inode})) { return 0; } my $size_result = $this->{size} <=> $that->{size}; if ($size_result < 0) { #print 'less on size'; return -1; } if ($size_result > 0) { #print 'greater on size'; return 1; } unless (defined $this->{prefix}) { open this_file, $this->{filename} or die "can't open datafile: $!"; binmode this_file; read this_file, $this->{prefix}, $prefix_length; close this_file; } unless (defined $that->{prefix}) { open that_file, $that->{filename} or die "can't open datafile: $!"; binmode that_file; read that_file, $that->{prefix}, $prefix_length; close that_file; } my $prefix_result = $this->{prefix} cmp $that->{prefix}; if ($prefix_result < 0) { # print 'less on prefix'; return -1; } if ($prefix_result > 0) { # print 'greater on prefix'; return 1; } unless (defined $this->{hash}) { my $this_ctx = Digest::MD5->new; my $this_md5_file; open $this_md5_file, $this->{filename} or die "can't open datafile: $!"; $this_ctx->addfile($this_md5_file); $this->{hash} = $this_ctx->digest; close $this_md5_file; } unless (defined $that->{hash}) { my $that_ctx = Digest::MD5->new; my $that_md5_file; open $that_md5_file, $that->{filename} or die "can't open datafile: $!"; $that_ctx->addfile($that_md5_file); $that->{hash} = $that_ctx->digest; close $that_md5_file; } my $hash_result = $this->{hash} cmp $that->{hash}; if ($hash_result < 0) { # print 'less on hash'; return -1; } if ($hash_result > 0) { # print 'greater on hash'; return 1; } #print $this->{filename}, $that->{filename}; my $full_compare_result = File::Compare::compare($this->{filename}, $that->{filename}); # if ($full_compare_result == -1) # { # print 'less on full compare'; # } # if ($full_compare_result == 1) # { # print 'greater on full compare'; # } # if ($full_compare_result == 0) # { # print 'equal :)'; # } # printf "returning full_compare_result %s", $full_compare_result; return $full_compare_result; } use overload ('""' => \&stringify); sub stringify { my $self = shift; # print 'stringifying'; return $self->{filename}; } } package main; # my $f1 = lazy_file_comparator->new('/tmp/t'); # my $f2 = lazy_file_comparator->new('/tmp/p'); # print "hi\n"; # printf "%d\n", ($f1 == $f2); # print "bye\n"; # printf "%s\n", $f1; # printf "%s\n", $f2; my @unsorted; foreach () { chomp; push @unsorted, lazy_file_comparator->new($_); } #print @unsorted; #sleep(60); my @sorted = sort { lazy_file_comparator::threeway_compare($a, $b) } @unsorted; #print @sorted; if (@sorted) { my $prior = $sorted[0]; print $prior; my $ind; for ($ind=1; $ind<@sorted; $ind++) # foreach $ind ([1..@sorted]) { # print "(Comparing $ind $sorted[$ind] and $prior)"; if ($sorted[$ind] == $prior) { # printf "('if' %s == %s)", $sorted[$ind], $prior; printf " %s", $sorted[$ind]; } else { # printf "('else' %s != %s)", $sorted[$ind], $prior; printf "\n%s", $sorted[$ind]; } $prior = $sorted[$ind]; } printf "\n"; }