#!/usr/bin/perl
use strict;
use warnings;
####### stores the input data
my ($file, $name) = @ARGV;
#### die statement to check that two arguments given and it is a protein sequence
die "The name cannot contain non protein letter which are:B, J, O, U, X, or Z," if ($name =~ m/[bjouxz]/i);
print "input is: $file\t $name\n";
#############################
# open the protein file and store each line into an array @line, then close the file
#stores the name in $name- stores its length in $length - capitalizes the name
open(my $in, "<$ARGV[0]") or die "Couldn´t open the file";
chomp(my @line = <$in>);
close $in;
my $name_length = length ($name);
$name =~ tr/a-z/A-Z/;
print "\$name_length is $name_length\n";
#print "@line\n\n";
#print "$line[0]\n"; #index 0 of array is empty
#print "$line[1]\n";
#print "$line[2]\n";
shift (@line);
# transfer the array @line to a hash %name2seq
my %name2seq = @line; #create a hash
#my @keys = keys (%name2seq);
#my @vals = values (%name2seq);
#print "keys: @keys\n";
#print "values: @vals\n";
############################ looking for a match
#for (my $i=0 ; $i < protein sequence length; $i ++)
#{create each possible substring of length $name_length and compare it (eq) with $name - if it matches print key - next -
my $found =0;
my $round =0;
foreach my $key_seq_name (keys %name2seq) {
$round ++;
my $sequence = $name2seq{$key_seq_name};
my $seqlength = length ($sequence);
#print "$seqlength\n";
my $search = 1;
while ($search) {
for (my $i=1; $i < ($seqlength - $name_length); $i ++) {
my $subseq = substr($sequence, $i, $name_length);
if ($subseq eq $name) {
my @short_key_seq_name = split(' ' ,$key_seq_name);
print "$short_key_seq_name[0] contains $name\n";
#print "$key_seq_name\n";
#print "$name2seq{$key_seq_name}\n";
$search = 0;
$found = 1;
last;
}
$search = 0;
last;
}
}
}
if ($found == 0) {
print "$name is not represented in this protein sequence\n";
print "This program checked $round sequences\n";
}