Here is a simple utility program to help locate duplicate cell data in
a Spreadsheet::WriteExcel file.
#!/usr/bin/perl -w
#############################################################################
#
#
find_duplicate_cells.pl.
#
# Simple Perl utility to find cells with duplicate data in an Excel
file.
#
# reverse('©'), May 2008, John McNamara,
jmcn...@cpan.org
#
use strict;
use Spreadsheet::ParseExcel;
use Spreadsheet::WriteExcel::Utility 'xl_rowcol_to_cell';
my %cell_data;
my $duplicate_found;
main();
#############################################################################
#
# Parse and Excel file and look for duplicates cell data.
#
sub main {
# Check for an input file and print a simple usage.
my $file_name = $ARGV[0];
if ( !$file_name ) {
die "Usage: perl $0 file.xls\n";
}
# Create the ParseExcel parser with a different cell handler.
my $parse_excel = Spreadsheet::ParseExcel->new(
CellHandler => \&cell_handler,
NotSetCell => 1
);
# Parse the Excel file.
my $workbook = $parse_excel->Parse($file_name);
# Print a message if the file didn't contain duplicates.
print "No duplicates cells found.\n" if !$duplicate_found;
}
#############################################################################
#
# Callback function to replace ParseExcel's default cell handler.
#
sub cell_handler {
my $workbook = $_[0];
my $sheet_index = $_[1];
my $row = $_[2];
my $col = $_[3];
my $cell = $_[4];
# Convert the row and col to A1 notation.
my $cell_index = xl_rowcol_to_cell( $row, $col );
# Get the worksheet name.
my $sheet_name = $workbook->{Worksheet}->[$sheet_index]->{Name};
# Check if data already exists for this cell.
if ( exists $cell_data{$sheet_name}->{$row}
&& exists $cell_data{$sheet_name}->{$row}->{$col} )
{
# Print a header. Once only.
print "Duplicates cells found:\n" if !$duplicate_found++;
# Check which data has been overwritten.
my $existing_data = $cell_data{$sheet_name}->{$row}->{$col};
# Let the user know which cell data is duplicated.
printf "'%s'!%s = %s\n", $sheet_name, $cell_index,
$existing_data;
printf "'%s'!%s = %s\n", $sheet_name, $cell_index, $cell-
>Value();
}
else {
# Keep track of cells that have already been seen.
$cell_data{$sheet_name}->{$row}->{$col} = $cell->Value();
}
}
__END__