#!usr/bin/perl

# Date created: 8-11-04
# Author:	Lukas Habegger
# Date Update  7-21-05 by bobby huether >>>> Added the code to make a clustal output file into a workfile line 5126 option 20
# 	7-26-05 fixed option 20, to actually work
#	5-30-06  Added feature to input a acc list (DORF list fopr instance)  and output the HSSP alignment of those sequences


#Covar_program_mod.pl - The program manipulates aligned sequence data.  
#    Copyright (C) 2006  Lukas Habegger
#	email: bhuether@hwi.buffalo.edu

#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License along
#    with this program; if not, write to the Free Software Foundation, Inc.,
#    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.



print "
	Covar_program_mod.pl - The program manipulates aligned sequence data.  
    Copyright (C) 2006  Lukas Habegger
	email: bhuether@hwi.buffalo.edu

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
";


use POSIX qw(ceil floor);
use FileHandle;
STDOUT->autoflush(1);
use LWP::Simple; 
use strict;

###########################################################################
# Global Variables
###########################################################################

my $menu_selection;
my $mday;
my $mon;
my $year;
my $date;
my $work_dir_name;
my $dir_name;


###########################################################################
# Main Program
###########################################################################

$dir_name = `pwd`;
chop $dir_name;
$work_dir_name = $dir_name;


&main;

###########################################################################
# Main Function
###########################################################################

sub main
{

	($mday,$mon,$year)=localtime(time);
	$date = localtime;

	&intro;
	if ($menu_selection eq "1")
		{&workdir;}
	elsif ($menu_selection eq "2")
		{&option1;}
	elsif ($menu_selection eq "3")
		{&option3;}
	elsif ($menu_selection eq "4")
		{&option3;}
	elsif ($menu_selection eq "5")
		{&option3;}
	elsif ($menu_selection eq "6")
		{&option3;}
	elsif ($menu_selection eq "7")
		{&option3;}
	elsif ($menu_selection eq "8")
		{&option3;}
	elsif ($menu_selection eq "9")
		{&option3;}
	elsif ($menu_selection eq "10")
		{&option4;}
	elsif ($menu_selection eq "11")
		{&option3;}
	elsif ($menu_selection eq "12")
		{&option3;}
	elsif ($menu_selection eq "13")
		{&option3;}
	elsif ($menu_selection eq "14")
		{}
	elsif ($menu_selection eq "15")
		{&exclude}
	elsif ($menu_selection eq "16")
		{&option5;}
	elsif ($menu_selection eq "17")
		{&option6;}
	elsif ($menu_selection eq "18")
		{&option7;}
	elsif ($menu_selection eq "19")
		{&option3;}
	elsif ($menu_selection eq "20")
		{&option8;}
	elsif ($menu_selection eq "21")
		{&option3;}	
	elsif ($menu_selection eq "h" || $menu_selection eq "H")
		{&help_menu;}	
	elsif ($menu_selection eq "e" || $menu_selection eq "E")
		{print "\nThanx for using this great program!!! \nEXITING...\n\n";}	
	else 
		{print "\n*** Invalid Selection ***\n\n"; print "\n> Press <ENTER>"; <STDIN>; &main;}
}



sub intro
{
	print `clear`;
	print "\n    #####   MAIN MENU   #####\n\n";
	print "(H)  To get help\n"; 
	print "(1)  To specify the working directory\n";
	print "(2)  To create a workfile from HSSP (requires PDB ID)\n";
	print "(3)  To generate a table/fingerprint/name list/species output/CLUSTALW output\n";
	print "(4)  To isolate a single AA at a certain location from workfile \n";
	print "(5)  To isolate proteins by name from workfile \n";
	print "(6)  To exclude a single AA at a certain location from workfile\n";
	print "(7)  To exculde an AA entirely from workfile\n";
	print "(8)  To isolate proteins with a certain number of one AA from workfile\n";
	print "(9)  To get an AA frequency from workfile\n";
	print "(10) To compare two tables\n";
	print "(11) To generate a fingerprint analysis table from workfile\n";
	print "(12) To isolate proteins by using fingerprint restrictions from workfile\n";
	print "(13) To generate an acession number list from workfile\n";
	print "(14) To compare to accession number lists\n";
	print "(15) To exclude a subset of proteins from workfile\n";
	print "(16) To do a full text search in Swiss-Prot and TrEMBL, followed by CLUSTALW alignment\n";
	print "(17) To do a BLAST followed by CLUSTALW alignment\n";
	print "(18) To compare several tables based on the fingerprint\n";
	print "(19) To generate species specific workfiles\n";
	print "(20) CLUSTALW file to .workfile\n";
	print "*****************************\n\t Codon Covar Converters\n***************************\n";
	print "(21) OUTPUT a HSSP alignment of INPUTed ACC file (DORF ACC)\n";
	
	
	
	print "(E)  To exit\n"; 
	print "\n\nPlease select option \n>";	
	$menu_selection = <STDIN>;
	chop $menu_selection;
}


sub workdir
{
	my $choice;
	my $sub_selection;
	my @valid_files;
	my $file_selection;
	my $dir_entry;
	my @dir_array;
	my @pdb_directories;
	my $dir_selection;
	my $k;
	my $temp;
	
	$dir_entry = "";
	@dir_array = ();
	
	print "\n\n(1) To specify the working directory\n";
	print "(2) To view the CURRENT directory\n";
	print "(3) To view the MAIN directory\n";
	print "(4) To go back to MAIN MENU\n";
	print "\n\nPlease select option\n>";
	$choice = <STDIN>;
	chop $choice;
	
	if ($choice eq "3")
	{
		print `clear`;
		print "\nPath of main directory: $dir_name\n\n";
		opendir(DIR,$dir_name) || die "can't open directory!";

		while(defined($dir_entry = readdir(DIR)))
		{
			push @dir_array, $dir_entry;
		} 
		closedir(DIR);
		shift @dir_array;
		shift @dir_array;

		foreach(@dir_array)
		{
			print "  $_\n";
		}
		&workdir;
		
	}
	elsif ($choice eq "2")
	{
		print `clear`;
		print "\nPath of current directory: $work_dir_name\n\n";
		opendir(DIR,$work_dir_name) || die "can't open directory!";

		while(defined($dir_entry = readdir(DIR)))
		{
			if ($dir_entry ne "temp.log")
				{push @dir_array, $dir_entry;}
		} 
		closedir(DIR); 
		shift @dir_array;
		shift @dir_array;

		if ($work_dir_name eq $dir_name)
		{
			foreach(@dir_array)
			{
				print "  $_\n";
			}
			&workdir;
		}
		print "\n\n";
	
		if ($work_dir_name ne $dir_name)
		{
			print `clear`;
			print "\nPath of current directory: $work_dir_name\n\n";
			$k = 1;
			foreach(@dir_array)
			{
				$temp = "($k)";
				printf "%6s",$temp; 
				print "   $_\n";
				$k++; 
			}
			print "\n\nSelect file to open or press (B) to go back\n>";
			$file_selection = <STDIN>;
			chop $file_selection;
			$file_selection = uc($file_selection);
		
			if (!($file_selection =~ /\d/ || $file_selection eq "B"))
			{
				print "\n*** Invalid Selection ***\n\n"; print "\n> Press <ENTER>"; <STDIN>; &workdir;
			}
			else
			{
				if (!($file_selection >= $k || $file_selection < 1))
				{
					`j $work_dir_name/$dir_array[$file_selection - 1]`;
					&workdir;
				}
				elsif ($file_selection eq "B")
				{
					&workdir;
				}	 
				else
				{
					print "\n*** Invalid Selection ***\n\n"; print "\n> Press <ENTER>"; <STDIN>; &workdir;
				}
			}
		}
			
	}
	elsif ($choice eq "1")
	{
		opendir(DIR,$dir_name) || die "can't open directory!";

		while(defined($dir_entry = readdir(DIR)))
		{
			push @dir_array, $dir_entry;
		} 
		closedir(DIR);
		shift @dir_array;
		shift @dir_array;

		$k = 1;
		print `clear`;
		print "\n";
		foreach(@dir_array)
		{
			if (-d $_)
			{
				$temp = "($k)";
				printf "%6s",$temp; 
				print "   $_\n";
				$k++; 
				push @pdb_directories, $_;
			}
			
		}
		print "\n\nSelect working directory\n>";
		$dir_selection = <STDIN>;
		chop $dir_selection;
		
		if ($dir_selection >= $k || $dir_selection < 1)
		{
			print "\n*** Invalid Selection ***\n\n"; print "\n> Press <ENTER>"; <STDIN>; &workdir;
		}
		else
		{
			$work_dir_name = "$dir_name/$pdb_directories[$dir_selection - 1]";
			print "\n\nDirectory set to: $work_dir_name\n";
			
			opendir(DIR,$work_dir_name) || die "can't open directory!";

			while(defined($dir_entry = readdir(DIR)))
			{
				if  ($dir_entry eq "temp.log")
				{
					`rm $work_dir_name/temp.log`;
				}
			} 
			closedir(DIR); 
			&workdir;
		}
	}
	elsif ($choice eq "4")
	{
		&main;
	}
	else
	{
		print "\n*** Invalid Selection ***\n\n"; print "\n> Press <ENTER>"; <STDIN>; &workdir;
	}		
}


sub option1
{
	my @pdb_id_junk;
	my $pdb_id_temp;
	my $pdb_id;
	my $hssp_file;
	my $tree;
	my $formatter;
	my $hssp_name;
	my $work_dir_extension;
	my $dir_entry;
	my @dir_array;
	my $error;
		
	require HTML::TreeBuilder;
	require HTML::FormatText;

	$error = 0;
	
	print "\nPlease enter PDB ID \n>";
	$pdb_id_temp = <STDIN>;
	@pdb_id_junk = split(/\n/,$pdb_id_temp);
	$pdb_id	= $pdb_id_junk[0];
	$work_dir_extension = uc($pdb_id);
	$work_dir_extension = "$work_dir_extension.dir";

	opendir(DIR,$dir_name) || die "can't open directory!";
	while(defined($dir_entry = readdir(DIR)))
	{
		push @dir_array, $dir_entry;
	} 
	closedir(DIR);
	
	foreach(@dir_array)
	{
		if ($_ eq $work_dir_extension)
		{
			print "\n\n*** $work_dir_extension already exists! ***\n\n";
			$error++;
			print "\n> Press <ENTER>";
			<STDIN>;
			&main;
			die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
		}
	}

	if ($error == 0)
	{
		$work_dir_name = "$dir_name/$work_dir_extension";
		`mkdir $work_dir_extension`;
		print "\nCreating new working directory: $work_dir_name\n";

 		$hssp_file = get "http://srs.ebi.ac.uk/srs6bin/cgi-bin/wgetz?-id+4Flds1DaOiR+-e+[hssp-ID:$pdb_id]";
		open TEMP, ">temp_file";
		print TEMP $hssp_file;
		close TEMP;
	
		if ($hssp_file =~ /no entries found/)	
		{
			print "\n*** Invalid PDB ID ***\n\n"; print "\n> Press <ENTER>";
			<STDIN>;
			&main;
			die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
		}	
		else
		{
			open HSSP, ">$work_dir_name/$pdb_id.hssp";
			$tree = HTML::TreeBuilder->new->parse_file("temp_file");
			$formatter = HTML::FormatText->new(leftmargin => 0, rightmargin => 100);
			print HSSP $formatter->format($tree);
			close HSSP;
			print "\nDone...\n";
			print "Generating HSSP file... ($pdb_id.hssp)\n";
		}
	
		`rm temp_file`;

		$hssp_name = "$pdb_id.hssp";
		&option2($hssp_name);
	}
}	

sub option2
{
	my $hssp_temp;
	my @hssp;
	my $file;
	my @hssp_lines;
	my @pdb_name;
	my @pdb_name_temp;
	my @aacid;
	my $a;
	my %aanum;
		undef %aanum;
	my $read;
	my $n;		
	my @linearr;
        my $nalign;
	my @chararr;
	my $start;
	my $end;
	my $seqno;
	my $pdbno;
	my $readcol;
	my $col;
	my @aaarray;
	my @chararr;
	my $r;
	my $c;
	my @percents;
	my $seqlength;
	my @nolist;
	my @nolist_temp;
	my @nocc;
	my @aacount;
	my $hssp_line_count;
	my @temp_name_A;
	my @temp_name_B;
	my @protein_name_array;
	my $concat_temp;
	my $concat_total;
	my $k;
	my $counter;
	my @work_flag;
	my $work_count; 
	my @chain_temp;
	my @chain;
	my $kchain;
		
	$hssp_temp = shift();

	$file = `cat $work_dir_name/$hssp_temp` or print "\n*** Invalid HSSP File ***\n\n"; 	
	if (!($file))
		{<STDIN>; &main; die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";}

	@hssp_lines = split (/\n/,$file);
	
	@aacid = (" ","V","L","I","M","F","W","Y","G","A","P","S","T","C","H","R","K","Q","E","N","D");
	
	for ($a = 0;$a <=20;$a++)
		{$aanum{$aacid[$a]} = $a;}
	
	$read = 0;
	$kchain = 0;
	$hssp_line_count = 2;

	foreach(@hssp_lines)
	{
		if ($_ =~ /PDBID/)
		{
			@pdb_name_temp = split(/PDBID\s+/,$_);
			@pdb_name = split(/DATE/,$pdb_name_temp[1]);
		}
	
		# Gets the names of the proteins

		if (/EMBL\/SWISSPROT identifier and alignment statistics/)
		{
			@protein_name_array = ();
			$counter = 1;

			for ($r = $hssp_line_count; $r < ($nalign + $hssp_line_count); $r++)
			{
				@temp_name_A = split(/\d\.\d\d  \d\.\d\d/,$hssp_lines[$r]);
				@temp_name_B = split(/\s+/,$temp_name_A[1]);
				
				for ($k = 0; $k < 9; $k++)
				{
					shift @temp_name_B;
				}
						
				$concat_temp = "";
				$concat_total = "";

				$k = 0;

				foreach(@temp_name_B)
				{
					if ($k == 0)
					{	
						$c = 0;
						$_ = "[$_]";
						$c = length($_);
						for ($k = $c ; $k < 11; $k++)
							{$_ = "$_ ";}
						$concat_total = " $_";
						$k++;		
					}
					else
					{
						$concat_temp = $_;
						$concat_total = "$concat_total $concat_temp";
					}
				}
				
				@protein_name_array[$counter] = $concat_total;
				$counter++;
			}
		}
		
		chomp;
		
		$read = 0 if (/SEQUENCE PROFILE AND ENTROPY/);
		$read++ if (/ALIGNMENTS/);

		if (/KCHAIN/)
		{
			@chain_temp = split(/chain\(s\) \: /,$_);
			@chain = split(//,$chain_temp[1]);
			$kchain = 1;
		}

		if (/NALIGN  /)
		{
			@linearr = split(/ /,&space($_));
			$nalign = @linearr[1];
		}
	
		if ($read && !(/ALIGNMENTS/))
		{
			@chararr = split(//,$_);
			@linearr = split(/ /,&space($_));
						
			if ($linearr[2] eq $chain[0] || $kchain == 0)			
			{	
				$start = 1 + ($read - 1) * 70;
				$end = ($start + 69 > $nalign?$nalign:$start + 69);
				$seqno = $linearr[0];
				$pdbno = $linearr[1];

				for ($col = $start,$readcol = 51;$col <= $end;$col++,$readcol++)
				{
					$aaarray[$seqno][$col] = ($chararr[$readcol]?$chararr[$readcol]:" ");				
				}

				if ($read == 1)
				{	
					$nolist_temp[$seqno] = $pdbno;	
				}
			}
		}

		$hssp_line_count++;
	}
	
	$seqlength = 0;
	foreach(@nolist_temp)
	{
		if ($_ != 0)	
		{
			push @nolist, $_;
			$seqlength++;
		}	
	}

	print "\nDone reading...\n";

	for ($r = 1; $r <= $nalign; $r++)
	{
		$work_flag[$r] = 0;
	}
	
	$k = 0;
	for ($r = 1; $r <= $nalign; $r++)
	{
		$work_count = 0;
		for ($c = 1; $c <= $seqlength; $c++)
		{
			if ($aaarray[$c][$r] =~ /\w/)
			{
				$work_count++;
			}	
		}
		if ($work_count > 1)
		{
			$work_flag[$r] = 1;
			$k++;
		}
	}

	open WORKFILE, ">$work_dir_name/$pdb_name[0].workfile";
	for ($r = 1; $r <= $nalign; $r++)
	{
		if ($work_flag[$r] == 1)
		{
			print WORKFILE ">$protein_name_array[$r]\n";
		
			for ($c = 1; $c <= $seqlength; $c++)
			{
				print WORKFILE "$aaarray[$c][$r]";	
			}
			print WORKFILE "\n";
		}
	}
	print WORKFILE "\n";
	
	foreach(@nolist)
	{
		print WORKFILE "&& $_\n"; 

	}

	print WORKFILE "\n";
	print WORKFILE "<< $k";

	print "Generating workfile... ($pdb_name[0].workfile)\n";
	close WORKFILE;
	
	open LOG, ">$work_dir_name/temp.log";
	print LOG "$pdb_name[0].workfile\n";
	close LOG;

	&end_menu_function(2);
}



###########################################################################
#	Space Function
###########################################################################


sub space
{
	my $linein;
	my @charsin;
	my $chars;
	my $m;
	my $space;
	
	$linein = $_;
	@charsin = split(//,$linein);
	$chars = "";
	for($m = 0,$space = "on";$m <= $#charsin;$m++)
	{
		if ($charsin[$m] eq " ")
		{
			$chars .= " " if ($space eq "off");
			$space = "on";
		}
		else
		{
			$chars .= $charsin[$m];
			$space = "off";
		}
	}
	return $chars;
}


sub option3
{
	my $work_file;
	my $work_file_name_temp;
	my @work_file_name;
	my @output_name;
	my $output_name_total;
	my @work_lines_temp;
	my @work_lines;
	my $work_line_count;
	my @work_names;
	my @work_chars;
	my @pdb_nums;	
	my $chars_count;
	my @super_array;
	my $read_control;
	my @temp;
	my %aanum;
		undef %aanum;
	my @aacid;
	my $d;
	my $c;
	my $r;
	my $num_proteins;
	my $seq_length;
	my @aacount;
	my @nocc;
	my @compare_aacount;
	my @compare_nocc;
	my @percents;
	my @compare_percents;
	my $page_num;
	my $page_num_total;
	my $output_count;
	my $aa_name_temp;
	my @aa_name;
	my $aa_loc_temp;
	my @aa_loc;
	my @flagged;
	my $flag;
	my $pdb_index_count;
	my $flag_count;
	my $yes_no_table_temp;
	my @yes_no_table;
	my $yes_no_list_temp;
	my @yes_no_list;
	my $yes_no_finger_temp;
	my @yes_no_finger;
	my $yes_no_compare_temp;
	my @yes_no_compare;
	my $yes_no_seq_temp;
	my @yes_no_seq;
	my $yes_no_species_temp;
	my @yes_no_species;
	my $yes_no_clustalw;
	my $chars_out_count_newline;
	my $chars_out_count_space;
	my $length_each_protein;
	my $protein_name_length;
	my $finger_min_temp;
	my @finger_min;
	my $num_of_finger;
	my $isolate_name_temp;
	my @isolate_name;
	my $isolate_eq;
	my $upper_isolate_eq;
	my $freq_treshold_temp;
	my @freq_treshold;
	my @freq_flagged;
	my @freq_nums;
	my $num_freq_total;
	my $exclude_count;
	my $range_threshold_temp;
	my @range_threshold;
	my $upper_range_temp;
	my $lower_range_temp;
	my @upper_range;
	my @lower_range;
	my $num_to_isolate_temp;
	my @num_to_isolate;
	my $range_occurrence_temp;
	my @range_occurrence;
	my $total_count;
	my $dir_name_temp;
	my @dir_name;
	my $dir_entry;
	my @dir_array;
	my $temp_log_file;
	my @temp_log_file_lines;
	my $input_control;
	my $input_control_sup;
	my $num_of_compare;
	my $compare_difference_temp;
	my @compare_difference;
	my @format_compare_outname;
	my $species_sort_temp;
	my @species_sort;
	my $page_increment;
	my $valid_location;
	my $name_include_exclude;
	my @isolation_names;
	my $multiple_names;
	my $multiple_outname;
	my $fingerprint_restriction;
	my $num_clust_segments;
	my $segment_start;
	my $segment_stop;
	my $mod_result;
	my $section_marker;
	my $space_counter;
	my $length_position;
	my $internal_counter;
	my $accession_length;
	my $clust_length_of_line;
	my @temp_access_num_temp;
	my @access_num_temp;
	my @access_num;
	my $line_counter;
	my $accession_counter;
	my $finger_template_name;
	my @temp_name;
		
		
	@aacid = (" ","V","L","I","M","F","W","Y","G","A","P","S","T","C","H","R","K","Q","E","N","D");
	
	for ($d = 0;$d <=20;$d++)
		{$aanum{$aacid[$d]} = $d;}

	$dir_entry = "";
	@dir_array = (); 

	opendir(DIR,$work_dir_name) || die " can't open directory!";

	while(defined($dir_entry = readdir(DIR)))
	{
		push @dir_array, $dir_entry;
	
	} 
	closedir(DIR);

	$input_control = 0;
	
	foreach(@dir_array)
	{
		if ($_ =~ /temp.log/)
		{
			$temp_log_file = `cat $work_dir_name/temp.log`;
			@temp_log_file_lines = split (/\n/,$temp_log_file);
			foreach(@temp_log_file_lines)
			{
				if ($_ =~ /donot/)
				{
					$input_control_sup = 1; 
				}
				if ($_ =~ /workfile/)
				{
					$work_file_name[0] = $_;
					$input_control = 1;
				}
			}
		}
	}

	if ($input_control == 0 || $input_control_sup == 1)
	{	
		print "\nPlease enter workfile\n>";
		$work_file_name_temp = <STDIN>;
		@work_file_name = split(/\n/,$work_file_name_temp);
		if (!($work_file_name[0] =~ /.workfile/))
			{print "\n*** Invalid Workfile ***\n\n"; print "\n> Press <ENTER>"; <STDIN>; &main; die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";}
	}	
		$work_file = `cat $work_dir_name/$work_file_name[0]` or print "\n*** Invalid Workfile ***\n\n";
		if (!($work_file))
			{<STDIN>; &main; die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";}

	
	@work_lines_temp = split(/\n/,$work_file);

	$num_proteins = 0;
	$read_control = 0;

	foreach(@work_lines_temp)
	{
		if ($read_control == 1)
		{
			push @work_lines, $_;
			$num_proteins++;
			$read_control = 0;
		}

		if ($_ =~ />/)
		{
			@temp = split (/\>/, $_);
			push @work_names, $temp[1];
			$read_control++;
		}

		if ($_ =~ /&&/)
		{
			@temp = split (/\&\& /, $_);
			push @pdb_nums, $temp[1];
		}
		
		if ($_ =~ /<</)
		{
			@temp = split (/\<\< /, $_);
			$total_count = $temp[1];
		}
	}

	@output_name = split(/\.workfile/,$work_file_name[0]);

	if ($menu_selection == 3 || $menu_selection == 13)
	{
		$output_name_total = $output_name[0];
	}
	
	if ($menu_selection == 19)
	{
		$yes_no_species[0] = "Y";
		$output_name_total = $output_name[0];		
	}
	
	if ($menu_selection == 4 || $menu_selection == 6 || $menu_selection == 7 || $menu_selection == 8 || $menu_selection == 9)
	{
		print "\nPlease enter AA\n>";
		$aa_name_temp = <STDIN>;
		@aa_name = split(/\n/,$aa_name_temp);
		$aa_name[0] = uc($aa_name[0]);	
	
		if ($aanum{$aa_name[0]} > 0 && $aanum{$aa_name[0]} < 21)
		{
			if ($menu_selection == 4 || $menu_selection == 6)
			{
				print "\nPlease enter AA location\n>";
				$aa_loc_temp = <STDIN>;
			}
		}	
		else
		{
			print "\n*** Invalid AA ***\n\n"; print "\n> Press <ENTER>";
			<STDIN>;
			&main;
			die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
		}
		
		if ($menu_selection == 4 || $menu_selection == 6)
		{
			@aa_loc = split(/\n/,$aa_loc_temp);
			$valid_location = 0;
			foreach(@pdb_nums)
			{
				if($_ == $aa_loc[0])
					{$valid_location++;}
			}

			if ($valid_location == 0)
			{
				print "\n*** Invalid Location ***\n\n"; print "\n> Press <ENTER>";
				<STDIN>;
				&main;
				die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
			}
			if ($menu_selection == 4)
				{$output_name_total = "$output_name[0]_$aa_name[0]$aa_loc[0]";}

			if ($menu_selection == 6)
				{$output_name_total = "$output_name[0]_no.$aa_name[0]$aa_loc[0]";}
		}
		
		if ($menu_selection == 7)
				{$output_name_total = "$output_name[0]_no.$aa_name[0]";}

		if ($menu_selection == 9)
		{
			print "\nWould you like to use a range or threshold (r/t)?\n>";
			$range_threshold_temp = <STDIN>;
			@range_threshold = split(/\n/,$range_threshold_temp);
			$range_threshold[0] = uc($range_threshold[0]);
			if (!($range_threshold[0] =~ /T/ || $range_threshold[0] =~ /R/))
			{
				print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>";
				<STDIN>;
				&main;
				die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
			}

			if ($range_threshold[0] =~ /T/)
			{ 
				print "\nPlease enter frequency treshold\n>";
				$freq_treshold_temp = <STDIN>;
				@freq_treshold = split(/\n/, $freq_treshold_temp);
				if (!($freq_treshold[0] =~ /\d/))
				{
					print "\n*** Invalid Threshold ***\n\n"; print "\n> Press <ENTER>";
					<STDIN>;
					&main;
					die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
				}
			}

			if ($range_threshold[0] =~ /R/)
			{ 
				print "\nPlease enter lower range\n>";
				$lower_range_temp = <STDIN>;
				@lower_range = split(/\n/, $lower_range_temp);
				print "\nPlease enter upper range\n>";
				$upper_range_temp = <STDIN>;
				@upper_range = split(/\n/, $upper_range_temp);
				if (!(($upper_range[0] =~ /\d/ || $lower_range[0] =~ /\d/) && ($upper_range[0] > $lower_range[0])))
				{
					print "\n*** Invalid Range ***\n\n"; print "\n> Press <ENTER>";
					<STDIN>;
					&main;
					die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
				}

			}
			$output_name_total = "$output_name[0]_$aa_name[0]";
		}


		if ($menu_selection == 8)
		{
			print "\nWould you like to enter a range of occurrences or a single occurrence (r/o)?\n>";
			$range_occurrence_temp = <STDIN>;
			@range_occurrence = split(/\n/,$range_occurrence_temp);
			$range_occurrence[0] = uc($range_occurrence[0]);
			if (!($range_occurrence[0] =~ /R/ || $range_occurrence[0] =~ /O/))
			{
				print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>";
				<STDIN>;
				&main;
				die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
				
			}
			if ($range_occurrence[0] =~ /O/)
			{
				print "\nPlease enter number of occurrences\n>";
				$num_to_isolate_temp = <STDIN>;
				@num_to_isolate = split(/\n/, $num_to_isolate_temp);
				if (!($num_to_isolate[0] =~ /\d/))
				{
					print "\n*** Invalid Number ***\n\n"; print "\n> Press <ENTER>";
					<STDIN>;
					&main;
					die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
				}

				$output_name_total = "$output_name[0]_$num_to_isolate[0]$aa_name[0]_s";
			}		
			if ($range_occurrence[0] =~ /R/)
			{
				print "\nPlease enter lower range\n>";
				$lower_range_temp = <STDIN>;
				@lower_range = split(/\n/, $lower_range_temp);
				print "\nPlease enter upper range\n>";
				$upper_range_temp = <STDIN>;
				@upper_range = split(/\n/, $upper_range_temp);
				if (!(($upper_range[0] =~ /\d/ || $lower_range[0] =~ /\d/) && ($upper_range[0] > $lower_range[0])))
				{
					print "\n*** Invalid Range ***\n\n"; print "\n> Press <ENTER>";
					<STDIN>;
					&main;
					die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
					
				}
				$output_name_total = "$output_name[0]_$lower_range[0]-$upper_range[0]$aa_name[0]_s";
			}		
		}
	}

	if ($menu_selection == 5)
	{
		print "\nPlease enter name to be isolated (multiple names seperated by commas and no space)\n>";
		$isolate_name_temp = <STDIN>;
		print "\nShould the entered name be included or excluded in the protein name (i/e)?\n>";
		$name_include_exclude = <STDIN>;
		chop $name_include_exclude;
		$name_include_exclude = uc($name_include_exclude);
		if ($name_include_exclude ne "I" && $name_include_exclude ne "E")
		{
			print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>";
			<STDIN>;
			&main;
			die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
		}
		@isolate_name = split(/\n/,$isolate_name_temp);
	}

	if ($menu_selection == 11 || $menu_selection == 12)
	{
		print "\nPlease enter fingerprint template file\n>";
		$finger_template_name = <STDIN>;
		chop $finger_template_name;
		@temp_name = split(/\_fingerprint\.template/,$finger_template_name);
		
		if ($menu_selection == 12)
		{
			print "\nPlease enter threshold for required number of residues in fingerprint\n>";
			$fingerprint_restriction = <STDIN>;
			chop $fingerprint_restriction;
			$output_name_total = "$output_name[0]_$temp_name[0]_$fingerprint_restriction";
		
			if (!($fingerprint_restriction =~ /\d/))
			{
				print "\n*** Invalid Input ***\n\n"; print "\n> Press <ENTER>";
				<STDIN>;
				&main;
				die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
			}
		}
		else
		{
			$output_name_total = "$output_name[0]_$temp_name[0]_finger";
		}
	}


	if ($menu_selection == 3 || $menu_selection == 4  || $menu_selection == 5 || $menu_selection == 6 || $menu_selection == 7 || $menu_selection == 8 || $menu_selection == 12)
	{
		print "\nWould you like to generate the table (y/n)?\n>";
		$yes_no_table_temp = <STDIN>;
		@yes_no_table = split(/\n/,$yes_no_table_temp);
		$yes_no_table[0] = uc($yes_no_table[0]);
		if ($yes_no_table[0] ne "Y" && $yes_no_table[0] ne "N")
		{
			print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>";
			<STDIN>;
			&main;
			die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
		}
	
		print "\nWould you like to generate the protein name list (y/n)?\n>";
		$yes_no_list_temp = <STDIN>;
		@yes_no_list = split(/\n/,$yes_no_list_temp);
		$yes_no_list[0] = uc($yes_no_list[0]);
		if ($yes_no_list[0] ne "Y" && $yes_no_list[0] ne "N")
		{
			print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>";
			<STDIN>;
			&main;
			die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
		}

		if ($yes_no_list[0] eq "Y")
		{
			print "\nWould you like to generate the sequence of each protein (y/n)?\n>";
			$yes_no_seq_temp = <STDIN>;
			@yes_no_seq = split(/\n/,$yes_no_seq_temp);
			$yes_no_seq[0] = uc($yes_no_seq[0]);
			if ($yes_no_seq[0] ne "Y" && $yes_no_seq[0] ne "N")
			{
				print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>";
				<STDIN>;
				&main;
				die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
			}
		}
		
		print "\nWould you like to generate CLUSTALW output (y/n)?\n>";
		$yes_no_clustalw = <STDIN>;
		chop $yes_no_clustalw;
		$yes_no_clustalw = uc($yes_no_clustalw);
		if ($yes_no_clustalw ne "Y" && $yes_no_clustalw ne "N")
		{
			print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>";
			<STDIN>;
			&main;
			die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
		}

		print "\nWould you like to generate the fingerprint (y/n)?\n>";
		$yes_no_finger_temp = <STDIN>;
		@yes_no_finger = split(/\n/,$yes_no_finger_temp);
		$yes_no_finger[0] = uc($yes_no_finger[0]);
		if ($yes_no_finger[0] ne "Y" && $yes_no_finger[0] ne "N")
		{
			print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>";
			<STDIN>;
			&main;	
			die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
		}

		if ($yes_no_finger[0] eq "Y")
		{
			print "\nPlease enter threshold\n>";
			$finger_min_temp = <STDIN>;
			@finger_min = split(/\n/,$finger_min_temp);
			if ($finger_min[0] < 0 || $finger_min[0] > 100)
			{
				print "\n*** Invalid Threshold ***\n\n"; print "\n> Press <ENTER>";
				<STDIN>;
				&main;
				die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
			}
		}

		print "\nWould you like to generate a species output (y/n)?\n>";
		$yes_no_species_temp = <STDIN>;
		@yes_no_species = split(/\n/,$yes_no_species_temp);
		$yes_no_species[0] = uc($yes_no_species[0]);
		if ($yes_no_species[0] ne "Y" && $yes_no_species[0] ne "N")
		{
			print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>";
			<STDIN>;
			&main;	
			die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
		}
	
		if ($yes_no_species[0] eq "Y")
		{
			print "\nWould you like to sort by name or by frequency (n/f)?\n>";
			$species_sort_temp = <STDIN>;
			@species_sort = split(/\n/,$species_sort_temp);
			$species_sort[0] = uc($species_sort[0]);
			if ($species_sort[0] ne "F" && $species_sort[0] ne "N")
			{
				print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>";
				<STDIN>;
				&main;	
				die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
			}
		}

		if ($menu_selection == 4  || $menu_selection == 5 || $menu_selection == 6 || $menu_selection == 7 || $menu_selection == 8 || $menu_selection == 12)
		{
			print "\nWould you like to compare the results to the CURRENT workfile (y/n)?\n>";
			$yes_no_compare_temp = <STDIN>;
			@yes_no_compare = split(/\n/,$yes_no_compare_temp);
			$yes_no_compare[0] = uc($yes_no_compare[0]);
			if ($yes_no_compare[0] ne "Y" && $yes_no_compare[0] ne "N")
			{
				print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>";
				<STDIN>;
				&main;	
				die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
			}
			if ($yes_no_compare[0] eq "Y")
			{
				print "\nPlease enter minimum difference\n>";
				$compare_difference_temp = <STDIN>;
				@compare_difference = split(/\n/,$compare_difference_temp);
				if ($compare_difference[0] < 0 || $compare_difference[0] > 100)
				{
					print "\n*** Invalid Threshold ***\n\n"; print "\n> Press <ENTER>";
					<STDIN>;
					&main;
					die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
				}
			}
		}
	}
	
	
	
	$seq_length = 0;
	$chars_count = 0;
	$work_line_count = 1;
	@super_array = ();

	foreach(@work_lines)
	{
		$_ =~ tr/a-z/A-Z/;
		@work_chars = split(//,$_);
		
		if ($chars_count > $seq_length)
			{$seq_length = $chars_count;}
		$chars_count = 1;
		$pdb_index_count = 0;
		foreach(@work_chars)
		{
			$super_array[($pdb_nums[$pdb_index_count])][$work_line_count] = $_;
			$chars_count++;
			$pdb_index_count++;
		}
		$work_line_count++;	
	}
	
	$seq_length = $seq_length - 1;

	if ($menu_selection == 4)
	{
		for ($c = 1;$c <= $num_proteins;$c++)
			{$flagged[$c] = 0;}

		$flag_count = 0;
		for ($c = 1;$c <= $num_proteins;$c++)
		{
			if ($super_array[$aa_loc[0]][$c] eq $aa_name[0])
			{
				$flagged[$c] = 1;
				$flag_count++;
			}
		}
		$total_count = "$total_count -> $flag_count";
	}


	if ($menu_selection == 5)
	{	
		$isolate_eq = ucfirst($isolate_name[0]); 
		$upper_isolate_eq = uc($isolate_name[0]); 
		$flag_count = 0;
		$multiple_names = 0;
		
		if ($isolate_name[0] =~ /,/)
		{
			@isolation_names = split(/\,/,$isolate_name[0]);
			$multiple_names++;
			$multiple_outname = $isolate_name[0];
			$multiple_outname =~ s/,/./g;
		}
	
		if($name_include_exclude eq "I")
		{
			for ($c = 1;$c <= $num_proteins;$c++)
				{$flagged[$c] = 0;}
		
		
			for ($c = 1;$c <= $num_proteins;$c++)
			{
				if ($multiple_names == 0)
				{
					if ($work_names[$c-1] =~ $isolate_name[0] || $work_names[$c-1] =~ $isolate_eq || $work_names[$c-1] =~ $upper_isolate_eq)
					{
						$flagged[$c] = 1;
						$flag_count++;
					}
					$output_name_total = "$output_name[0]_$isolate_name[0]";
				}
				else
				{
					foreach(@isolation_names)
					{
						$isolate_eq = ucfirst($_);
						$upper_isolate_eq = uc($_);
						
						if ($work_names[$c-1] =~ $_ || $work_names[$c-1] =~ $isolate_eq || $work_names[$c-1] =~ $upper_isolate_eq)
						{
							$flagged[$c] = 1;
							$flag_count++;
						}
					}
					$output_name_total = "$output_name[0]_$multiple_outname";
				}
			}
			
			$total_count = "$total_count -> $flag_count";
		}
		elsif($name_include_exclude eq "E")
		{
			for ($c = 1;$c <= $num_proteins;$c++)
				{$flagged[$c] = 1;}
		
		
			for ($c = 1;$c <= $num_proteins;$c++)
			{
				if ($multiple_names == 0)
				{
					if ($work_names[$c-1] =~ $isolate_name[0] || $work_names[$c-1] =~ $isolate_eq || $work_names[$c-1] =~ $upper_isolate_eq)
					{
						$flagged[$c] = 0;
						$flag_count++;
					}
					$output_name_total = "$output_name[0]_no.$isolate_name[0]";
				}
				else
				{
					foreach(@isolation_names)
					{
						$isolate_eq = ucfirst($_);
						$upper_isolate_eq = uc($_);
						
						if ($work_names[$c-1] =~ $_ || $work_names[$c-1] =~ $isolate_eq || $work_names[$c-1] =~ $upper_isolate_eq)
						{
							$flagged[$c] = 0;
							$flag_count++;
						}
					}
					$output_name_total = "$output_name[0]_no.$multiple_outname";
				}
			}
			$flag_count = $num_proteins - $flag_count;
			$total_count = "$total_count -> $flag_count";		
		}
		
	}


	if ($menu_selection == 6)
	{
		for ($c = 1;$c <= $num_proteins;$c++)
			{$flagged[$c] = 1;}

		$flag_count = 0;
		for ($c = 1;$c <= $num_proteins;$c++)
		{
			if ($super_array[$aa_loc[0]][$c] eq $aa_name[0])
			{
				$flagged[$c] = 0;
				$flag_count++;
			}
		}

		$flag_count = $num_proteins - $flag_count;
		$total_count = "$total_count -> $flag_count";
	}


	if ($menu_selection == 7)
	{
		for ($c = 1;$c <= $num_proteins;$c++)
			{$flagged[$c] = 1;}

		$flag_count = 0;
		
		for ($c = 1;$c <= $num_proteins;$c++)
		{
			$exclude_count = 0;
			for ($r = 0;$r < $seq_length ;$r++)
			{
				if ($super_array[$r][$c] eq $aa_name[0])
				{
					$exclude_count++;
				}
			}

			if ($exclude_count > 0)
			{
				$flag_count++;
				$flagged[$c] = 0;
			}
		}

		$flag_count = $num_proteins - $flag_count;
		$total_count = "$total_count -> $flag_count";
	}


	if ($menu_selection == 9)
	{
		for ($r = 0;$r < $seq_length ;$r++)
			{$freq_flagged[$r] = 0;}

		@freq_nums = ();
		$num_freq_total = 0;

		for ($r = 0;$r < $seq_length ;$r++)
		{
			$flag_count = 0;

			for ($c = 1;$c <= $num_proteins;$c++)
			{
				if ($super_array[$pdb_nums[$r]][$c] eq $aa_name[0])
				{
					$flag_count++;
				}
			}
			
			if ($range_threshold[0] =~ /T/)
			{ 
				if ($flag_count >= $freq_treshold[0])
				{
					$freq_nums[$r] = $flag_count;
					$freq_flagged[$r] = 1;
					$num_freq_total++;
				}
			}
			
			if ($range_threshold[0] =~ /R/)
			{ 
				if ($flag_count >= $lower_range[0] && $flag_count <= $upper_range[0])
				{
					$freq_nums[$r] = $flag_count;
					$freq_flagged[$r] = 1;
					$num_freq_total++;
				}
			}
		}
	}


	if ($menu_selection == 8)
	{
		for ($c = 1;$c <= $num_proteins;$c++)
			{$flagged[$c] = 0;}

		$flag_count = 0;
		
		for ($c = 1;$c <= $num_proteins;$c++)
		{
			$exclude_count = 0;
			for ($r = 0;$r < $seq_length ;$r++)
			{
				if ($super_array[$r][$c] eq $aa_name[0])
				{
					$exclude_count++;
				}
			}

			if ($range_occurrence[0] =~ /O/)
			{
				if ($exclude_count == $num_to_isolate[0])
				{
					$flag_count++;
					$flagged[$c] = 1;
				}
			}

			if ($range_occurrence[0] =~ /R/)
			{
				if ($exclude_count >= $lower_range[0] && $exclude_count <= $upper_range[0])
				{
					$flag_count++;
					$flagged[$c] = 1;
				}
			}
		}
		$total_count = "$total_count -> $flag_count";
	}

	
	
	open LOG, ">$work_dir_name/temp.log";
	if ($menu_selection == 3 || $menu_selection == 9 || $menu_selection == 11 || $menu_selection == 13)	
	{	
		print LOG "$work_file_name[0]\n";
	}

	
	if ($menu_selection == 11 || $menu_selection == 12)
	{
		my $finger_template;
		my @finger_temp_lines;
		my @defined_positions;
		my @defined_residues;
		my @temp_access_num_temp;
		my @access_num_temp;
		my @access_num;
		my @line_split;
		my $defined_positions_count;
		my $defined_residues_count;
		my @multi_array;
		my $entry_counter;
		my $fing_num;
		my $residue_string_length;
		my @multiple_residues;
		my $treshold_count;
		my @num_sorted_multi_array;
		my $sort_number;
		my $sort_increment;
		my $sort_endpoint;
		my @temp_1_exclusions;
		my @temp_2_exclusions;
		my @defined_exclusions;
		my $exclusion_control;
		my $exclusion_string_length;
		my $unflag_control;
		my @residue_display;
		my @labelled_residues_temp_1;
		my @labelled_residues_temp_2;
		my @labelled_residues;
		my $label_control;
		my $label_string_length;
		my @sorted_flagged;
		my @sort_number_counter;
		my @header_residues;
		my $exclude_tag;
		my $exclude_tag_control;
		
		
				
		$finger_template = `cat $work_dir_name/$finger_template_name`;
		@finger_temp_lines = split(/\n/,$finger_template);
		
		$defined_positions_count = 0;
		$defined_residues_count = 0;
		$exclude_tag = "_excluded";
		$exclude_tag_control = 0;
		
		$sort_endpoint = 0;
		print "\n\nDefined Residues and Positions\nin <$work_dir_name/$finger_template_name>\n\n";
		
		foreach(@finger_temp_lines)
		{
			if ($_ =~ /\,/)
			{
				@residue_display = split(/\,/,$_);
				$residue_display[1] = uc($residue_display[1]);
				printf "%8s", $residue_display[0];
				print "   $residue_display[1]\n"; 
			
				$exclusion_control = 0;
				$label_control = 0;
				
				if ($_ =~ /\[/)
				{			
					@labelled_residues_temp_1 = split(/\[/,$_);
					$_ = $labelled_residues_temp_1[0];
					@labelled_residues_temp_2 = split(/\]/,$labelled_residues_temp_1[1]);
					$label_control = 1;
				}
				
				if ($_ =~ /\{/)
				{			
					@temp_1_exclusions = split(/\{/,$_);
					$_ = $temp_1_exclusions[0];
					@temp_2_exclusions = split(/\}/,$temp_1_exclusions[1]);
					$exclusion_control = 1;
					if ($exclude_tag_control == 0)
					{
						$output_name_total = $output_name_total.$exclude_tag;
						$exclude_tag_control++;
					}
				}		
				
				@line_split = split(/\,/,$_);
				
				$defined_positions_count++;
				$header_residues[$defined_positions_count] = $residue_display[1];
				$defined_positions[$defined_positions_count] = $line_split[0];
				
				if (length($line_split[1]) > 0)
				{
					$line_split[1] = uc($line_split[1]);
					$defined_residues[$defined_positions_count] = $line_split[1];
					$defined_residues_count++;
				}
				else
				{
					$defined_residues[$defined_positions_count] = "";
				}
				
				if ($label_control == 1)
				{
					$labelled_residues_temp_2[0] = uc($labelled_residues_temp_2[0]);
					$labelled_residues[$defined_positions_count] = $labelled_residues_temp_2[0];
				}
				else
				{
					$labelled_residues[$defined_positions_count] = "";
				}
			
				if ($exclusion_control == 1)
				{
					$temp_2_exclusions[0] = uc($temp_2_exclusions[0]);
					$defined_exclusions[$defined_positions_count] = $temp_2_exclusions[0]
				}
				else
				{
					$defined_exclusions[$defined_positions_count] = "";
				}
				
			}
			else
			{
				printf "%8s", $_;
				print "\n";
				$defined_positions_count++;
				$defined_positions[$defined_positions_count] = $_;
				$defined_residues[$defined_positions_count] = "";
				$labelled_residues[$defined_positions_count] = "";
				$defined_exclusions[$defined_positions_count] = "";
			}
		}
		
		for ($c = 1; $c <= $num_proteins; $c++)
		{
			@temp_access_num_temp = split(/\[/,$work_names[$c-1]);
			@access_num_temp = split(/\]/,$temp_access_num_temp[1]);
			push @access_num, $access_num_temp[0];
		}
				
		
		@multi_array = ();
		
		
		for ($c = 1;$c <= $num_proteins;$c++)	
		{
			
			$multi_array[$c][0] = $access_num[$c-1];
			$entry_counter = 1;
			
			for($r = 1; $r <= $defined_positions_count; $r++)
			{
				$multi_array[$c][$entry_counter] = $super_array[$defined_positions[$r]][$c];
				$entry_counter++;
			}
			
		}
		
		$treshold_count = 0;
		
		if ($menu_selection == 12)
		{
			for ($c = 1;$c <= $num_proteins;$c++)
				{$flagged[$c] = 0;}
			$sort_endpoint = $fingerprint_restriction;
		}
		
		for ($c = 1;$c <= $num_proteins;$c++)	
		{		
			$fing_num = 0;
			$unflag_control = 0;
			
			for ($r = 1; $r <= $defined_positions_count; $r++)
			{
				$residue_string_length = length($defined_residues[$r]); 
				$exclusion_string_length = length($defined_exclusions[$r]); 
				$label_string_length = length($labelled_residues[$r]);
				 
				if ($residue_string_length == 1)
				{	
					if ($defined_residues[$r] eq $multi_array[$c][$r])
					{
						$multi_array[$c][$r] = "($multi_array[$c][$r])";
						$fing_num++;
					}
				}
				if($residue_string_length > 1)
				{
					@multiple_residues = split(//,$defined_residues[$r]);
					
					foreach(@multiple_residues)
					{
						if ($_ eq $multi_array[$c][$r])
						{
							$multi_array[$c][$r] = "($multi_array[$c][$r])";
							$fing_num++;
						}
					}
				}
				
				if ($label_string_length == 1)
				{	
					if ($labelled_residues[$r] eq $multi_array[$c][$r])
					{
						$multi_array[$c][$r] = "[$multi_array[$c][$r]]";
					}
				}
				if($label_string_length > 1)
				{
					@multiple_residues = split(//,$labelled_residues[$r]);
					
					foreach(@multiple_residues)
					{
						if ($_ eq $multi_array[$c][$r])
						{
							$multi_array[$c][$r] = "[$multi_array[$c][$r]]";
						}
					}
				}
				
				if ($menu_selection == 12)
				{
					if ($exclusion_string_length == 1)
					{	
						if ($defined_exclusions[$r] eq $multi_array[$c][$r])
						{
							$unflag_control = 1;
						}
					}
					if($exclusion_string_length > 1)
					{
						@multiple_residues = split(//,$defined_exclusions[$r]);
					
						foreach(@multiple_residues)
						{
							if ($_ eq $multi_array[$c][$r])
							{
								$unflag_control = 1;
							}
						}
					}
				}
			}
			
			$multi_array[$c][$defined_positions_count+1] = $fing_num;
			
			if ($menu_selection == 12)
			{
				if ($multi_array[$c][$defined_positions_count+1] >= $fingerprint_restriction && $unflag_control == 0)
				{
					$treshold_count++;
					$flagged[$c] = 1;
				}
			}
		}
		
		$sort_number = $defined_residues_count;
		@num_sorted_multi_array = ();
		$sort_increment = 1;
		
		for ($c = $sort_endpoint; $c <= $sort_number; $c++)	
		{
			$sort_number_counter[$c] = 0;
		}
		
		
		for ($sort_number; $sort_number >= $sort_endpoint; $sort_number--)
		{	
			for ($c = 1;$c <= $num_proteins;$c++)	
			{		
				if ($menu_selection == 12)
				{
					if ($flagged[$c] == 1)
					{
						if($multi_array[$c][$defined_positions_count+1] == $sort_number)
						{
							for ($r = 0; $r <= $defined_positions_count+1; $r++)
							{
								$num_sorted_multi_array[$sort_increment][$r] = $multi_array[$c][$r];
							}
							$sort_number_counter[$sort_number]++;
							$sort_increment++;
						}
					}	
				}
				else
				{
					if($multi_array[$c][$defined_positions_count+1] == $sort_number)
					{
						for ($r = 0; $r <= $defined_positions_count+1; $r++)
						{
							$num_sorted_multi_array[$sort_increment][$r] = $multi_array[$c][$r];
						}
						$sort_number_counter[$sort_number]++;
						$sort_increment++;
					}
				}
			}
		}
		
		
		@multi_array = @num_sorted_multi_array;
		$sort_number = $defined_residues_count;
		
		if ($menu_selection == 12)
		{
			$total_count = "$total_count -> $treshold_count";
			$flag_count = $treshold_count;
			
		}
				
		$fing_num = "$fing_num ";
		
		open(OUT, ">$work_dir_name/$output_name_total.finger_table");
		
		print OUT "Date: $date\n";
		print OUT "PDB ID: $output_name_total\nNumber of proteins: $total_count\n\n\n";
		
		print OUT "  Access";
		
		for ($r = 1; $r <= $defined_positions_count; $r++)
		{
			printf OUT "%5s",$defined_positions[$r];
		}	
		print OUT "   NUM\n        ";
		
		
		foreach(@header_residues)
		{
			$residue_string_length = length($_);
			if($residue_string_length == 1)
				{$_ = "$_ ";}
		}
				
		for ($r = 1; $r <= $defined_positions_count; $r++)
		{
			printf OUT "%5s",$header_residues[$r];
		}	
		
		print OUT "\n\n";
		
		for ($c = 1;$c <= $num_proteins;$c++)	
		{		
			
			for ($r = 1; $r <= $defined_positions_count; $r++)
			{
				$residue_string_length = length($multi_array[$c][$r]);
				if ($residue_string_length == 1)
					{$multi_array[$c][$r] = "$multi_array[$c][$r] ";}
			}
			
		}
			
		if ($menu_selection == 12)
		{
			for ($c = 1;$c <= $sort_increment; $c++)	
			{
				printf OUT "%8s", $multi_array[$c][0];
				for ($r = 1; $r <= $defined_positions_count + 1; $r++)
				{
					printf OUT "%5s", $multi_array[$c][$r];
				}
				print OUT"\n";
							
			}
		
		}
		else
		{
			for ($c = 1;$c <= $num_proteins;$c++)	
			{	
				printf OUT "%8s", $multi_array[$c][0];
				for ($r = 1; $r <= $defined_positions_count + 1; $r++)
				{
					printf OUT "%5s", $multi_array[$c][$r];
				}
				print OUT"\n";
			}
		}
		
		#########################################################################
#		
#		my $acyl_count  = 0;
#		my @count_array_pos;
#		my @count_array_neg;
#		my $ucfirst_temp;
#		
#		$ucfirst_temp = ucfirst($temp_name[0]);
#		$sort_number = $defined_residues_count;
#		
#		for ($c = $sort_endpoint; $c <= $sort_number; $c++)	
#		{
#			$count_array_pos[$c] = 0;
#			$count_array_neg[$c] = 0;
#		}
#		
#		
#		if ($menu_selection == 12)
#		{	
#			for ($c = 1;$c < $sort_increment;$c++)	
#			{
#				for ($r = 1; $r <= $num_proteins; $r++)
#				{
#					if ($work_names[$r-1] =~ $multi_array[$c][0])
#					{
#						for ($sort_number; $sort_number >= $sort_endpoint; $sort_number--)
#						{
#							if ($multi_array[$c][$defined_positions_count+1] == $sort_number)
#							{
#								if ($work_names[$r-1] =~ /$temp_name[0]/ || $work_names[$r-1] =~ /$ucfirst_temp/)
#									{$count_array_pos[$sort_number]++;}
#								else
#									{$count_array_neg[$sort_number]++;}
#							}							
#						}
#						
#						$sort_number = $defined_residues_count;
#					}
#				}
#			}
#		}
#
#
		###########################################################################
		
		
		
		$temp_name[0] = uc($temp_name[0]);
		
		print OUT "\n\n\n   NUM    Number of Proteins\n";
#		printf OUT "%15s", $temp_name[0];
#		print OUT "                     ELSE\n";
		
		for ($c = $sort_number; $c >= $sort_endpoint; $c--)	
		{
			printf OUT "%5s", $c;
			printf OUT "%13s", $sort_number_counter[$c];
#			printf OUT "%25s", $count_array_pos[$c];
#			printf OUT "%25s", $count_array_neg[$c];   
 			print OUT "\n";
		}
		 
		
		
		
		for ($r = 1; $r <= $defined_positions_count; $r++)
		{
			for ($c = 1;$c <= $num_proteins;$c++)
			{
				if ($menu_selection == 12)
				{
					if ($flagged[$c] == 1)
					{
						$aacount[$defined_positions[$r]][$aanum{$super_array[$defined_positions[$r]][$c]}]++;
						$nocc[$defined_positions[$r]]++ if ($aanum{$super_array[$defined_positions[$r]][$c]});
					}
				}
				if ($menu_selection == 11)
				{
					$aacount[$defined_positions[$r]][$aanum{$super_array[$defined_positions[$r]][$c]}]++;
					$nocc[$defined_positions[$r]]++ if ($aanum{$super_array[$defined_positions[$r]][$c]});
				}
			}
			for ($c = 1;$c <= 20; $c++) 
			{
				$percents[$defined_positions[$r]][$c] = ($aacount[$defined_positions[$r]][$c] / $nocc[$defined_positions[$r]] * 100) if ($nocc[$defined_positions[$r]]);
			}
		}
		
		print OUT "\n\n\n\n     ";
		
		for ($r = 1; $r <= $defined_positions_count; $r++)
		{
			printf OUT "%5s",$defined_positions[$r]; 
		}
		
		print OUT "\n";
		
		for ($c = 1;$c <= 20; $c++) 
		{
			printf OUT "%5s", $aacid[$c];
		
			for ($r = 1; $r <= $defined_positions_count; $r++)
			{
				printf OUT "%5.0f", $percents[$defined_positions[$r]][$c];
			}
			print OUT "\n";
		}
		
				
		@percents = ();
		@nocc = ();
		@aacount = ();
		
		
		close OUT;
		
		
		if ($menu_selection == 12 || $menu_selection == 11)		
		{
			$output_count = 1;
			$page_num = 1;
			$page_num_total = ceil($flag_count / 55);

			if ($menu_selection == 11)
				{$page_num_total = ceil($num_proteins / 55);}
			
			if ($menu_selection == 11 || $yes_no_list[0] eq "Y")
			{
				open(EX, ">$work_dir_name/$output_name_total.sorted_name_list");
			
				print EX "Date: $date                                            Page: $page_num / $page_num_total \n";
				print EX "PDB ID: $output_name_total\nNumber of proteins: $total_count\nSequence Length: $seq_length\n\n";
			}
				
			if ($menu_selection == 11)
			{
				for ($c = 1;$c <= $num_proteins;$c++)	
				{
					for ($r = 1; $r <= $num_proteins; $r++)
					{
						if ($work_names[$r-1] =~ $multi_array[$c][0])
						{
							print EX "$work_names[$r-1]\n";
						}
					}
				}
			}
			if ($menu_selection == 12)
			{			
				for ($c = 1;$c < $sort_increment;$c++)	
				{
					for ($r = 1; $r <= $num_proteins; $r++)
					{
						if ($work_names[$r-1] =~ $multi_array[$c][0])
						{
							if ($yes_no_list[0] eq "Y")
							{
								print EX "$work_names[$r-1]\n";
							}
							
							$sorted_flagged[$c] = $r; 
						}
					}
				}
			}
			
			if ($menu_selection == 11 || $yes_no_list[0] eq "Y")
			{
				close EX;
				print LOG "$output_name_total.sorted_name_list\n";
			}				
		}
		
	
		
		
		if($yes_no_clustalw eq "Y")
		{
			open CLUST, ">$work_dir_name/$output_name_total.clustalw";
			$clust_length_of_line = 60;
			$num_clust_segments = ceil($seq_length / $clust_length_of_line);
			$segment_start = 1;
			$segment_stop = $clust_length_of_line;
			$section_marker = "|";
			
			
			for ($d = 1; $d <= $num_clust_segments; $d++)
			{
				print CLUST "              ";
				
				$internal_counter = 0;
				$space_counter = 1;
								
				for($r = $segment_start; $r <= $segment_stop; $r++) 
				{
					$internal_counter++;
					
					$mod_result = $pdb_nums[$r-1] % 10;
					if ($mod_result == 0)
					{
						print CLUST $pdb_nums[$r-1];
						$length_position = length($pdb_nums[$r-1]);
						$space_counter = $space_counter + $length_position;						
					}
					else
					{
						
						
						if ($space_counter == $internal_counter)
						{
							print CLUST " ";
							$space_counter++;
						}
					}
				}
				print CLUST "\n";
				
				print CLUST "              ";
				for($r = $segment_start; $r <= $segment_stop; $r++) 
				{
					$mod_result = $pdb_nums[$r-1] % 10;
					if ($mod_result == 0)
					{
						print CLUST $section_marker; 
					}
					else
					{
						print CLUST " ";
					}
				}
						
				for ($c = 1;$c < $sort_increment;$c++)	 
				{
					$accession_length = length($multi_array[$c][0]);
				
					if ($accession_length == 5)
					{
						$multi_array[$c][0] = "$multi_array[$c][0]         ";
					}
					if ($accession_length == 6)
					{
						$multi_array[$c][0] = "$multi_array[$c][0]        ";
					}
					if ($accession_length == 7)
					{
						$multi_array[$c][0] = "$multi_array[$c][0]       ";
					}
					if ($accession_length == 8)
					{
						$multi_array[$c][0] = "$multi_array[$c][0]      ";
					}
				
					print CLUST "\n$multi_array[$c][0]";
												
					for($r = $segment_start; $r <= $segment_stop; $r++) 
					{
						if ($super_array[$pdb_nums[$r-1]][$sorted_flagged[$c]] eq " " || $super_array[$pdb_nums[$r-1]][$sorted_flagged[$c]] eq ".")
						{
							$super_array[$pdb_nums[$r-1]][$sorted_flagged[$c]] = "-";
						}
						
						print CLUST $super_array[$pdb_nums[$r-1]][$sorted_flagged[$c]];
					}
				}
				
				$segment_start = $segment_stop + 1;
				$segment_stop = $segment_stop + $clust_length_of_line;
				
				if ($segment_stop > $seq_length)
					{$segment_stop = $seq_length;}
				
				print CLUST "\n\n\n\n";
			
			}
			close CLUST;
			print LOG "$output_name_total.clustalw\n";
		}
		
		print LOG "$output_name_total.finger_table\n";
			
	}
	
	
	
	if ($menu_selection == 13)
	{	
		$line_counter = 0;
		$accession_counter = 0;
		
		for ($c = 1; $c <= $num_proteins; $c++)
		{
			@temp_access_num_temp = split(/\[/,$work_names[$c-1]);
			@access_num_temp = split(/\]/,$temp_access_num_temp[1]);
			push @access_num, $access_num_temp[0];
			$accession_counter++;
		}
		
		open ACCESS_NUMS, ">$work_dir_name/$output_name_total.accession";
		
		foreach(@access_num)
		{
			print ACCESS_NUMS $_;
			$line_counter++;
			if ($line_counter < $accession_counter)
			{
				print ACCESS_NUMS "\n";
			}
		}
		
		close ACCESS_NUMS;
		print LOG "$output_name_total.accession\n";
	}
	
	
	
		
	if ($menu_selection == 3 || $menu_selection == 4 || $menu_selection == 5 || $menu_selection == 6 || $menu_selection == 7 || $menu_selection == 8 || $menu_selection == 12)
	{
	my @finger_residues;
	
		for ($r = 1;$r <= $seq_length;$r++)
		{
			for ($c = 1;$c <= $num_proteins;$c++)
			{
				if ($menu_selection == 4 || $menu_selection == 5 || $menu_selection == 6 || $menu_selection == 7 || $menu_selection == 8 || $menu_selection == 12)
				{
					if ($flagged[$c] == 1)
					{
						$aacount[$pdb_nums[$r-1]][$aanum{$super_array[$pdb_nums[$r-1]][$c]}]++;
						$nocc[$pdb_nums[$r-1]]++ if ($aanum{$super_array[$pdb_nums[$r-1]][$c]});
					}
				}
				if($menu_selection == 3 || $yes_no_compare[0] eq "Y")
				{
					if ($yes_no_compare[0] eq "Y")
					{
						$compare_aacount[$pdb_nums[$r-1]][$aanum{$super_array[$pdb_nums[$r-1]][$c]}]++;
						$compare_nocc[$pdb_nums[$r-1]]++ if ($aanum{$super_array[$pdb_nums[$r-1]][$c]});
					}
					else
					{
						$aacount[$pdb_nums[$r-1]][$aanum{$super_array[$pdb_nums[$r-1]][$c]}]++;
						$nocc[$pdb_nums[$r-1]]++ if ($aanum{$super_array[$pdb_nums[$r-1]][$c]});
					}
				}
			}
			for ($c = 1;$c <= 20; $c++) 
			{
				$percents[$pdb_nums[$r-1]][$c] = ($aacount[$pdb_nums[$r-1]][$c] / $nocc[$pdb_nums[$r-1]] * 100) if ($nocc[$pdb_nums[$r-1]]);
			
				if ($yes_no_compare[0] eq "Y")
				{
					$compare_percents[$pdb_nums[$r-1]][$c] = ($compare_aacount[$pdb_nums[$r-1]][$c] / $compare_nocc[$pdb_nums[$r-1]] * 100) if ($compare_nocc[$pdb_nums[$r-1]]);
				}
			}
		}
	}

	

###########################################################################
#	Generating Table 
###########################################################################

	if ($yes_no_table[0] eq "Y")
	{
		$output_count = 1;
		$page_num = 1;
		$page_num_total = ceil($seq_length / 53);
	
		open(OUTPUT, ">$work_dir_name/$output_name_total.table");

		print OUTPUT "Date: $date                                            Page: $page_num / $page_num_total \n";
		print OUTPUT "PDB ID: $output_name_total\nNumber of proteins: $total_count\nSequence Length: $seq_length\n\n";
		printf OUTPUT "  # ";
		for($c = 1;$c <= 20;$c++)
		{
			printf OUTPUT "%4s",$aacid[$c]." ";
		}
		printf OUTPUT " Total\n";
	
		for($r = 1;$r <= $seq_length;$r++)
		{	
			if ($output_count == 55)
			{
				$page_num++;

				print OUTPUT "Date: $date                                            Page: $page_num / $page_num_total\n";
				print OUTPUT "PDB ID: $output_name_total\nNumber of proteins: $total_count\nSequence Length: $seq_length\n\n";
				printf OUTPUT "  # ";
				for($c = 1;$c <= 20;$c++)
				{
					printf OUTPUT "%4s",$aacid[$c]." ";
				}
				printf OUTPUT " Total\n";

				$output_count = 1;
		
			}

			printf OUTPUT "%3s", $pdb_nums[$r-1];
			for($c = 1;$c <= 20;$c++)
			{	
				printf OUTPUT "%4.0f" , $percents[$pdb_nums[$r-1]][$c];
			}
			print OUTPUT "  $nocc[$pdb_nums[$r-1]]\n";
			$output_count++;
		}

		print LOG "$output_name_total.table\n";
		close OUTPUT;
	}


###########################################################################
#	Generating Name List 
###########################################################################


	if ($yes_no_list[0] eq "Y" && $yes_no_seq[0] eq "N" && $menu_selection != 12)
	{
		$output_count = 1;
		$page_num = 1;
		$page_num_total = ceil($flag_count / 55);

		if ($menu_selection == 3)
			{$page_num_total = ceil($num_proteins / 55);}
			
		open LIST, ">$work_dir_name/$output_name_total.names";
	
		print LIST "Date: $date                                            Page: $page_num / $page_num_total \n";
		print LIST "PDB ID: $output_name_total\nNumber of proteins: $total_count\nSequence Length: $seq_length\n\n";
		for ($c = 1;$c <= $num_proteins;$c++)
		{

			if ($output_count == 56)
			{
				$page_num++;
				print LIST "Date: $date                                            Page: $page_num / $page_num_total\n";
				print LIST "PDB ID: $output_name_total\nNumber of proteins: $total_count\nSequence Length: $seq_length\n\n";
				$output_count = 1;
			}

			if ($menu_selection == 4 || $menu_selection == 5 || $menu_selection == 6 || $menu_selection == 7 || $menu_selection == 8)
			{
				if ($flagged[$c] == 1)
				{
					print LIST "$work_names[$c-1]\n";
					$output_count++;
				}
			}
			else
			{
				print LIST "$work_names[$c-1]\n";
				$output_count++;
			}

		}

		print LOG "$output_name_total.names\n";
		close LIST;
	}


###########################################################################
#	Generating Name List with Sequences
###########################################################################


	if ($yes_no_list[0] eq "Y" && $yes_no_seq[0] eq "Y")
	{
		$output_count = 0;
		$page_num = 1;

		$page_num_total = ceil($flag_count / ceil(55 / (2 + ceil($seq_length / 70))));
		$page_increment = ceil($page_num_total / ceil(55 / (2 + ceil($seq_length / 70))));
		$page_num_total = $page_num_total + $page_increment;
		
		if ($menu_selection == 3)
		{
			$page_num_total = ceil($num_proteins / ceil(55 / (2 + ceil($seq_length / 70))));
			$page_increment = ceil($page_num_total / ceil(55 / (2 + ceil($seq_length / 70))));
			$page_num_total = $page_num_total + $page_increment;
		}

		open SEQ, ">$work_dir_name/$output_name_total.names_seq";
	
		print SEQ "Date: $date                                            Page: $page_num / $page_num_total \n";
		print SEQ "PDB ID: $output_name_total\nNumber of proteins: $total_count\nSequence Length: $seq_length\n\n";
		
		for ($c = 1;$c <= $num_proteins;$c++)
		{
			if (($output_count + (2 + ceil($seq_length /70))) <= 55)
			{
				if ($menu_selection == 4 || $menu_selection == 5 || $menu_selection == 6 || $menu_selection == 7 || $menu_selection == 8 || $menu_selection == 12)
				{
					if ($flagged[$c] == 1)
					{
						$chars_out_count_newline = 0;
						$chars_out_count_space = 0;
						$length_each_protein = 0;

						for($r = 1;$r <= $seq_length;$r++)
						{
							if ($super_array[$pdb_nums[$r-1]][$c] =~ /\w/)
								{$length_each_protein++;}
						}
				
						$protein_name_length = length($work_names[$c-1]);
				
						for ($a = $protein_name_length; $a < 57; $a++)
						{
							$work_names[$c-1] = "$work_names[$c-1] ";				
						}

						print SEQ "$work_names[$c-1]                   Length: $length_each_protein\n   ";
	
						for($r = 1;$r <= $seq_length;$r++)
						{
							if ($chars_out_count_newline == 70)
							{
								print SEQ "\n  ";
								$chars_out_count_newline = 0;
							}
							if ($chars_out_count_space == 10)
							{
								print SEQ " ";
								$chars_out_count_space = 0;
							}
							print SEQ $super_array[$pdb_nums[$r-1]][$c];
							$chars_out_count_newline++;
							$chars_out_count_space++;

						}
						print SEQ "\n\n";

						$output_count = $output_count + 2 + ceil($seq_length /70);
					}
				}
				else
				{
					$chars_out_count_newline = 0;
					$chars_out_count_space = 0;
					$length_each_protein = 0;

					for($r = 1;$r <= $seq_length;$r++)
					{
						if ($super_array[$pdb_nums[$r-1]][$c] =~ /\w/)
							{$length_each_protein++;}
					}
				
					$protein_name_length = length($work_names[$c-1]);
					
					for ($d = $protein_name_length; $d < 57; $d++)
					{
						$work_names[$c-1] = "$work_names[$c-1] ";				
					}

					print SEQ "$work_names[$c-1]                   Length: $length_each_protein\n   ";

					for($r = 1;$r <= $seq_length;$r++)
					{
						if ($chars_out_count_newline == 70)
						{
							print SEQ "\n  ";
							$chars_out_count_newline = 0;
						}
						if ($chars_out_count_space == 10)
						{
							print SEQ " ";
							$chars_out_count_space = 0;
						}
						print SEQ $super_array[$pdb_nums[$r-1]][$c];
						$chars_out_count_newline++;
						$chars_out_count_space++;

					}
					print SEQ "\n\n";

					$output_count = $output_count + 2 + ceil($seq_length /70);
				}
			}
			else
			{
				for ($d = $output_count; $d < 55; $d++)
				{
					print SEQ "\n";				
				}

				$page_num++;

				print SEQ "Date: $date                                            Page: $page_num / $page_num_total\n";
				print SEQ "PDB ID: $output_name_total\nNumber of proteins: $total_count\nSequence Length: $seq_length\n\n";
				$output_count = 0;
			
				if ($menu_selection == 4 || $menu_selection == 5 || $menu_selection == 6 || $menu_selection == 7 || $menu_selection == 8 || $menu_selection == 12)
				{
					if ($flagged[$c] == 1)
					{
						$chars_out_count_newline = 0;
						$chars_out_count_space = 0;
						$length_each_protein = 0;

						for($r = 1;$r <= $seq_length;$r++)
						{
							if ($super_array[$pdb_nums[$r-1]][$c] =~ /\w/)
								{$length_each_protein++;}
						}
				
						$protein_name_length = length($work_names[$c-1]);
				
						for ($a = $protein_name_length; $a < 57; $a++)
						{
							$work_names[$c-1] = "$work_names[$c-1] ";				
						}

						print SEQ "$work_names[$c-1]                   Length: $length_each_protein\n   ";
	
						for($r = 1;$r <= $seq_length;$r++)
						{
							if ($chars_out_count_newline == 70)
							{
								print SEQ "\n  ";
								$chars_out_count_newline = 0;
							}
							if ($chars_out_count_space == 10)
							{
								print SEQ " ";
								$chars_out_count_space = 0;
							}
							print SEQ $super_array[$pdb_nums[$r-1]][$c];
							$chars_out_count_newline++;
							$chars_out_count_space++;
						}
						print SEQ "\n\n";

						$output_count = $output_count + 2 + ceil($seq_length /70);
					}
				}
				else
				{
					$chars_out_count_newline = 0;
					$chars_out_count_space = 0;
					$length_each_protein = 0;

					for($r = 1;$r <= $seq_length;$r++)
					{
						if ($super_array[$pdb_nums[$r-1]][$c] =~ /\w/)
							{$length_each_protein++;}
					}
				
					$protein_name_length = length($work_names[$c-1]);
					
					for ($d = $protein_name_length; $d < 57; $d++)
					{
						$work_names[$c-1] = "$work_names[$c-1] ";				
					}

					print SEQ "$work_names[$c-1]                   Length: $length_each_protein\n   ";

					for($r = 1;$r <= $seq_length;$r++)
					{
						if ($chars_out_count_newline == 70)
						{
							print SEQ "\n  ";
							$chars_out_count_newline = 0;
						}
						if ($chars_out_count_space == 10)
						{
							print SEQ " ";
							$chars_out_count_space = 0;
						}
						print SEQ $super_array[$pdb_nums[$r-1]][$c];
						$chars_out_count_newline++;
						$chars_out_count_space++;
					}
					print SEQ "\n\n";

					$output_count = $output_count + 2 + ceil($seq_length /70);
				}
			}
		}

		print LOG "$output_name_total.names_seq\n";
		close SEQ;
	}
	
	
	
###########################################################################
#	Generating CLUSTALW output
###########################################################################


	if($yes_no_clustalw eq "Y" && $menu_selection != 12)
	{
		open CLUST, ">$work_dir_name/$output_name_total.clustalw";
		$clust_length_of_line = 60;
		$num_clust_segments = ceil($seq_length / $clust_length_of_line);
		$segment_start = 1;
		$segment_stop = $clust_length_of_line;
		$section_marker = "|";
		
		for ($c = 1; $c <= $num_proteins; $c++)
		{
			@temp_access_num_temp = split(/\[/,$work_names[$c-1]);
			@access_num_temp = split(/\]/,$temp_access_num_temp[1]);
			push @access_num, $access_num_temp[0];
		}
					
		for ($d = 1; $d <= $num_clust_segments; $d++)
		{
			print CLUST "              ";
			
			$internal_counter = 0;
			$space_counter = 1;
							
			for($r = $segment_start; $r <= $segment_stop; $r++) 
			{
				$internal_counter++;
				
				$mod_result = $pdb_nums[$r-1] % 10;
				if ($mod_result == 0)
				{
					print CLUST $pdb_nums[$r-1];
					$length_position = length($pdb_nums[$r-1]);
					$space_counter = $space_counter + $length_position;						
				}
				else
				{	
					if ($space_counter == $internal_counter)
					{
						print CLUST " ";
						$space_counter++;
					}
				}
			}
			print CLUST "\n";
			
			print CLUST "              ";
			for($r = $segment_start; $r <= $segment_stop; $r++) 
			{
				$mod_result = $pdb_nums[$r-1] % 10;
				if ($mod_result == 0)
				{
					print CLUST $section_marker; 
				}
				else
				{
					print CLUST " ";
				}
			}
					
			for ($c = 1;$c <= $num_proteins; $c++)	 
			{
				$accession_length = length($access_num[$c-1]);
				
				if ($accession_length == 5)
				{
					$access_num[$c-1] = "$access_num[$c-1]         ";
				}
				if ($accession_length == 6)
				{
					$access_num[$c-1] = "$access_num[$c-1]        ";
				}
				if ($accession_length == 7)
				{
					$access_num[$c-1] = "$access_num[$c-1]       ";
				}
				if ($accession_length == 8)
				{
					$access_num[$c-1] = "$access_num[$c-1]      ";
				}
				
			
				if ($menu_selection == 4 || $menu_selection == 5 || $menu_selection == 6 || $menu_selection == 7 || $menu_selection == 8)
				{
					if ($flagged[$c] == 1)
					{
						print CLUST "\n$access_num[$c-1]";	
					}
				}
				else
				{
					print CLUST "\n$access_num[$c-1]";
				}
											
				for($r = $segment_start; $r <= $segment_stop; $r++) 
				{
					if ($super_array[$pdb_nums[$r-1]][$c] eq " " || $super_array[$pdb_nums[$r-1]][$c] eq ".")
					{
						$super_array[$pdb_nums[$r-1]][$c] = "-";
					}
					
					if ($menu_selection == 4 || $menu_selection == 5 || $menu_selection == 6 || $menu_selection == 7 || $menu_selection == 8)
					{
						if ($flagged[$c] == 1)
						{
							print CLUST $super_array[$pdb_nums[$r-1]][$c];
						}
					}
					else
					{
						print CLUST $super_array[$pdb_nums[$r-1]][$c];
					}
				}
			}
			
			$segment_start = $segment_stop + 1;
			$segment_stop = $segment_stop + $clust_length_of_line;
			
			if ($segment_stop > $seq_length)
				{$segment_stop = $seq_length;}
			
			print CLUST "\n\n\n\n";
		
		}
		close CLUST;
		print LOG "$output_name_total.clustalw\n";
	}
	
	

###########################################################################
#	Generating Fingerprint
###########################################################################

	if ($yes_no_finger[0] eq "Y")
	{
	my @finger_residues;
		$output_count = 0;
		$page_num = 1;
		$num_of_finger = 0;
		
		for ($r = 1;$r <= $seq_length;$r++)
		{
			for($c = 1;$c <= 20;$c++)
			{	
				if ($percents[$pdb_nums[$r-1]][$c] >= $finger_min[0] && $nocc[$pdb_nums[$r-1]] >= (0.5 * $flag_count))
				{	
					$num_of_finger++;	
				}
			}
		}

		$page_num_total = ceil($num_of_finger / 53);

		open FINGER, ">$work_dir_name/$output_name_total.finger";
	
		print FINGER "Date: $date                                            Page: $page_num / $page_num_total \n";
		print FINGER "PDB ID: $output_name_total\nNumber of proteins: $total_count\nSequence Length: $seq_length\nTreshold: $finger_min[0]%\n\n";
		print FINGER " AA	Position       % \n";
		
		for ($r = 1;$r <= $seq_length;$r++)
		{
			for($c = 1;$c <= 20;$c++)
			{	
				if ($percents[$pdb_nums[$r-1]][$c] >= $finger_min[0] && $nocc[$pdb_nums[$r-1]] >= (0.5 * $flag_count))
				{	
					print FINGER " $aacid[$c]";      
					printf FINGER "%10.0f",$pdb_nums[$r-1];
					printf FINGER "%12.0f",$percents[$pdb_nums[$r-1]][$c];
					print FINGER "\n";
					$output_count++;
					push @finger_residues, $aacid[$c];
					     
					if ($output_count == 53)
					{
						$page_num++;
				
						print FINGER "Date: $date                                            Page: $page_num / $page_num_total\n";
						print FINGER "PDB ID: $output_name_total\nNumber of proteins: $total_count\nSequence Length: $seq_length\nTreshold: $finger_min[0]%\n\n";
						print FINGER " AA	Position       % \n";

						$output_count = 0;
					}
				}
			}
		}

		print LOG "$output_name_total.finger\n";
		
		
		my @aa_sum_array;
		my $aa_sum_total = 0;
		my $aa_percent;
		
		for ($c = 1;$c <= 20; $c++) 
		{
			$aa_sum_array[$c] = 0;
				
			for ($r = 1;$r <= $seq_length;$r++)
				{$aa_sum_array[$c] += $aacount[$pdb_nums[$r-1]][$c];}
			$aa_sum_total += $aa_sum_array[$c];
		}
		
		
		my @control;
		my $control_count;
		my $missing_residues = "";
		my %miss_counts;
		my @unsort_finger_I;
		my @sort_finger_I; 
		my @unsort_finger_II;
		my @sort_finger_II; 
		my @aa_and_finger;
		my $trnaI_sum = 0;
		my $trnaI_percent =0;
		my $missing_aa_test;
		my $missing_aa_count;
		my $tot_finger_I=0;
		
		my $tot_finger=0;
		
		
		
		####### AA not represented in the fingerprint
		print FINGER "\n AA makeup of Fingerprint at the current threshold\n";
		print FINGER "tRNA II #   %   tRNAI #   %\n";
		
			###### fingerprint_makeup alternates between trnaII and I from @aacid.  i didnt want to reassign the aa in @aacid i am just using those positins here
		my @fingerprint_makeup = ("9","1","8","2","11","3","12","18","20","15","16","17","19","4","10","7","5","13","14","6"); 
		$c=0;
		
		$tot_finger = @finger_residues;
		
		foreach(@fingerprint_makeup)
		{   
		$missing_aa_test = $_; 
		$missing_aa_count = 0;
		foreach(@finger_residues)
		 	{ 
			if($aacid[$missing_aa_test] =~ /$_/)
				{
				$missing_aa_count++;
				}	
			}
		printf FINGER "%4.1s %4d %4.1f", $aacid[$missing_aa_test], $missing_aa_count, $missing_aa_count/$tot_finger*100; 
		$c++;	
		if($c==2)
			{
			$tot_finger_I += $missing_aa_count;
			print FINGER "\n";
			$c=0;
			}	
		
		}
		printf FINGER "    =%4d %4.1f    =%4d %4.1f \nTotal Residue Makeup %4d",$tot_finger-$tot_finger_I, ($tot_finger-$tot_finger_I)/$tot_finger*100, $tot_finger_I, $tot_finger_I/$tot_finger*100, $tot_finger;		
	
		#######
		print FINGER "\n\n\nTotal amino acid usage:\n\nAA	#	      %	       AA	#	    %\n";
		
		for ($c = 0;$c <= 20; $c++) 
		{
		$control[$c] = 0;
		if($c == 1 || $c == 2 || $c == 3 || $c == 4 ||$c == 6 ||$c == 7 ||$c == 13 ||$c == 15 ||$c ==17 || $c ==18)
			{	
			$unsort_finger_I[$c] = $aa_sum_array[$c]." ".$aacid[$c];
			$trnaI_sum =  $aa_sum_array[$c] + $trnaI_sum;
			}
		else
			{
			$unsort_finger_II[$c] = $aa_sum_array[$c]." ".$aacid[$c];
			}
		
		}
		@sort_finger_I = sort{$b <=> $a} @unsort_finger_I;
		@sort_finger_II = sort{$b <=> $a} @unsort_finger_II;
		
		for ($c = 0;$c <= 9; $c++) 
		{
		
			@aa_and_finger = split(/\s+/,$sort_finger_I[$c]);
			$aa_percent = $aa_and_finger[0] / $aa_sum_total * 100;
			print FINGER " $aa_and_finger[1]"; 
			printf FINGER "%10.0f", $aa_and_finger[0];
			printf FINGER "%12.2f", $aa_percent;
			
			
			@aa_and_finger = split(/\s+/,$sort_finger_II[$c]);
			$aa_percent = $aa_and_finger[0] / $aa_sum_total * 100;
			print FINGER "\t$aa_and_finger[1]"; 
			printf FINGER "%10.0f", $aa_and_finger[0];
			printf FINGER "%12.2f", $aa_percent;
			print FINGER "\n";
			
			
		}
		########print trna summations
		
		$trnaI_percent = $trnaI_sum / $aa_sum_total * 100;
		print FINGER "\n\ttRNAI: \t\t\t\ttRNAII:\n";
		printf FINGER "%10.0f", $trnaI_sum;
		printf FINGER "%12.2f", $trnaI_percent;
		
		#print FINGER "\ntRNAII:";
		printf FINGER "\t\t%10.0f", $aa_sum_total-$trnaI_sum;
		printf FINGER "%12.2f\n", 100-$trnaI_percent;
		
		########
		
		
		for ($c = 1; $c <= $num_proteins; $c++)
		{
			$r = 1;
			$control_count = 0;
			$missing_residues = "";
					
			while($r <= $seq_length && $control_count != 20)
			{
				$control[$aanum{$super_array[$pdb_nums[$r-1]][$c]}]++;
				
				if (($r % 60) == 0 || $r == $seq_length)
				{
					$control_count = 0;
					
					for ($d = 1;$d <= 20; $d++) 
					{
						 
						if ($control[$d] != 0)
							{$control_count++;}
					}
				}
				
				$r++;
			}
			 
			if ($control_count != 20)
			{
				
				for ($d = 1;$d <= 20; $d++) 
				{
					if ($control[$d] == 0)
						{$missing_residues = $missing_residues . $aacid[$d];}
				}
				
				$miss_counts{$missing_residues}++;
				
			}
			
			for ($d = 1;$d <= 20; $d++) 
			{
				$control[$d] = 0;
			}
		
		}
		
		print FINGER  "\n\n\nMutually Exclusive Missing Amino Acids:\n\n";
		
		while ( my ($key, $value) = each(%miss_counts) ) 
		{
			printf FINGER "%10s",$key; 
        		print FINGER "   $value\n";
    		}
 




		
		close FINGER;

	}



###########################################################################
#	Generating Comparison Output
###########################################################################

	if ($yes_no_compare[0] eq "Y")
	{
		$output_count = 0;
		$page_num = 1;
		$num_of_compare = 0;
		
		for ($r = 1;$r <= $seq_length;$r++)
		{
			for($c = 1;$c <= 20;$c++)
			{	
				if (($percents[$pdb_nums[$r-1]][$c] - $compare_percents[$pdb_nums[$r-1]][$c]) >= $compare_difference[0])
				{	
					$num_of_compare++;	
				}
			}
		}

		$page_num_total = ceil($num_of_compare / 53);

		open COMPARE, ">$work_dir_name/$output_name_total.comparison";

		$output_name_total = "$output_name_total compared to $output_name[0]";

		print COMPARE "Date: $date                                            Page: $page_num / $page_num_total \n";
		print COMPARE "PDB ID: $output_name_total\nNumber of proteins: $total_count\nSequence Length: $seq_length\nMinimum Difference: $compare_difference[0]%\n\n";
		print COMPARE " AA	Position          Before(%)       After(%)    Difference(%)     Total\n";
		
		for ($r = 1;$r <= $seq_length;$r++)
		{
			for($c = 1;$c <= 20;$c++)
			{	
				
				if (($percents[$pdb_nums[$r-1]][$c] - $compare_percents[$pdb_nums[$r-1]][$c]) >= $compare_difference[0])
				{	
					print COMPARE " $aacid[$c]         $pdb_nums[$r-1]";
					if ($pdb_nums[$r-1] < 10)
						{print COMPARE "    ";}
					if ($pdb_nums[$r-1] < 100 && $pdb_nums[$r-1] >= 10)
						{print COMPARE "   ";}
					if ($pdb_nums[$r-1] < 1000 && $pdb_nums[$r-1] >= 100)
						{print COMPARE "  ";}
					printf COMPARE "%15.0f",$compare_percents[$pdb_nums[$r-1]][$c];
					printf COMPARE "%15.0f",$percents[$pdb_nums[$r-1]][$c];
					printf COMPARE "%15.0f",($percents[$pdb_nums[$r-1]][$c] - $compare_percents[$pdb_nums[$r-1]][$c]);
					printf COMPARE "%15.0f",$nocc[$pdb_nums[$r-1]];	
					print COMPARE "\n";
					$output_count++;

					if ($output_count == 53)
					{
						$page_num++;
				
						print COMPARE "Date: $date                                            Page: $page_num / $page_num_total\n";
						print COMPARE "PDB ID: $output_name_total\nNumber of proteins: $total_count\nSequence Length: $seq_length\nMinimum Difference: $compare_difference[0]%\n\n";
						print COMPARE " AA	Position          Before(%)       After(%)    Difference(%)     Total\n";
						
						$output_count = 0;
					}
				}
			}
		}

		@format_compare_outname = split(/ compared to /,$output_name_total);
		$output_name_total = $format_compare_outname[0]; 
		print LOG "$output_name_total.comparison\n";
		close COMPARE;

	}




###########################################################################
#	Species Output
###########################################################################

	if ($yes_no_species[0] eq "Y")
	{
		my @temp_access_num_temp;
		my @access_num_temp;
		my @access_num;
		my $access_length;
		my $tax_htmldoc;
		my @html_format_temp;
		my @species_unformatted_temp;
		my @species_unformatted;
		my @species;
		my @kingdom_unformatted_temp;
		my @kingdom_unformatted;
		my @kingdom;
		my @cross_link_temp;
		my @cross_link;
		my @bacteria;
		my @archaea;
		my @eukaryota;
		my @viroids;
		my @viruses;
		my @others;
		my @bacteria_count;
		my @archaea_count;
		my @eukaryota_count;
		my @viroids_count;
		my @viruses_count;
		my @others_count;
		my $bacteria_total;
		my $archaea_total;
		my $eukaryota_total;
		my $viroids_total;
		my $viruses_total;
		my $others_total;
		my $total_total;
		my $species_found;
		my $bacteria_name_total;
		my $archaea_name_total;
		my $eukaryota_name_total;
		my $viroids_name_total;
		my $viruses_name_total;
		my $others_name_total;
		my @sort_split;
		my @sort_temp;
		my $num_length;
		my $total_name_total;	
		my @tax_line;
		my $access_deleted;
		my $access_code;
		my @access_species;
		my $temp_access_name;
		
			
		
		for ($c = 1; $c <= $num_proteins; $c++)
		{
			if ($menu_selection == 4 || $menu_selection == 5 || $menu_selection == 6 || $menu_selection == 7 || $menu_selection == 8 || $menu_selection == 12)
			{
				if ($flagged[$c] == 1)
				{
					@temp_access_num_temp = split(/\[/,$work_names[$c-1]);
					@access_num_temp = split(/\]/,$temp_access_num_temp[1]);
					push @access_num, $access_num_temp[0];
				}
			}
			else
			{
				@temp_access_num_temp = split(/\[/,$work_names[$c-1]);
				@access_num_temp = split(/\]/,$temp_access_num_temp[1]);
				push @access_num, $access_num_temp[0];
			}

			$bacteria_count[$c-1] = 0;
			$archaea_count[$c-1] = 0;
			$eukaryota_count[$c-1] = 0;
			$viroids_count[$c-1] = 0;
			$viruses_count[$c-1] = 0;
			$others_count[$c-1] = 0;
		}
		
		$bacteria_total = 0;
		$archaea_total = 0;
		$eukaryota_total = 0;
		$viroids_total = 0;
		$viruses_total = 0;
		$others_total = 0;
		$bacteria_name_total = 0;
		$archaea_name_total = 0;
		$eukaryota_name_total = 0;
		$viroids_name_total = 0;
		$viruses_name_total = 0;
		$others_name_total = 0;
				
		$c = 1;
		print "\n";
		
		foreach(@access_num)
		{
			$access_code = $_;
			$access_length = length($access_code);
			$tax_htmldoc = get "http://us.expasy.org/cgi-bin/niceprot.pl?$access_code";
			
			
			$access_deleted = 0;
			@tax_line = split (/\n/, $tax_htmldoc);
			foreach (@tax_line)
			{
				if ($_ =~ "This TrEMBL entry has been deleted")
				{$access_deleted++;}
				$kingdom[0] = "ERROR";
				$species[0] = "This TrEMBL entry has been deleted [$access_code]";
			}

			if ($access_length == 6 && $access_deleted == 0)
			{				
				@html_format_temp = split(/Taxonomy/, $tax_htmldoc);
				
				@species_unformatted_temp = split(/OC\=/,$html_format_temp[1]);
				@species_unformatted = split(/\>/,$species_unformatted_temp[1]);
				@species = split(/\</,$species_unformatted[1]);

				@kingdom_unformatted_temp = split(/OC\=/,$html_format_temp[3]);
				@kingdom_unformatted = split(/\>/,$kingdom_unformatted_temp[1]);
				@kingdom = split(/\</,$kingdom_unformatted[1]);
				
			}
			if ($access_length == 8 && $access_deleted == 0)
			{
				if ($tax_htmldoc =~ /PRELIMINARY/)
				{
					@species_unformatted = split(/OS   /,$tax_htmldoc);
					@species = split(/\./,$species_unformatted[1]);

					@kingdom_unformatted = split(/OC   /,$tax_htmldoc);
					@kingdom = split(/\;/,$kingdom_unformatted[1]);
				}
				else
				{
					@cross_link_temp = split(/HREF\=\"\/cgi\-bin\/niceprot\.pl\?/, $tax_htmldoc);
					@cross_link = split(/\"/, $cross_link_temp[1]);
					$tax_htmldoc = get "http://us.expasy.org/cgi-bin/niceprot.pl?$cross_link[0]";

					@html_format_temp = split(/Taxonomy/, $tax_htmldoc);
					@species_unformatted_temp = split(/OC\=/,$html_format_temp[1]);
					@species_unformatted = split(/\>/,$species_unformatted_temp[1]);
					@species = split(/\</,$species_unformatted[1]);

					@kingdom_unformatted_temp = split(/OC\=/,$html_format_temp[3]);
					@kingdom_unformatted = split(/\>/,$kingdom_unformatted_temp[1]);
					@kingdom = split(/\</,$kingdom_unformatted[1]);
				}

			}
			
			if ($kingdom[0] =~ /Bacteria/)
			{
				$r = 0;
				$species_found = 0;
				$temp_access_name = "$_,Bacteria";
				push @access_species, $temp_access_name;
				
				foreach(@bacteria)
				{
					if ($_ eq $species[0])
					{
						$species_found = 1;
						$bacteria_count[$r]++;
					}
					$r++;
				}
				if ($species_found == 0)
				{
					push @bacteria, $species[0];
					$bacteria_count[$r]++;	
					$bacteria_name_total++;
				}
			}
			elsif ($kingdom[0] =~ /Archaea/)
			{
				$r = 0;
				$species_found = 0;
				$temp_access_name = "$_,Archaea";
				push @access_species, $temp_access_name;
				
				foreach(@archaea)
				{
					if ($_ eq $species[0])
					{
						$species_found = 1;
						$archaea_count[$r]++;
					}
					$r++;
				}
				if ($species_found == 0)
				{
					push @archaea, $species[0];
					$archaea_count[$r]++;
					$archaea_name_total++;	
				}
			}
			elsif ($kingdom[0] =~ /Eukaryota/)
			{
				$r = 0;
				$species_found = 0;
				$temp_access_name = "$_,Eukaryota";
				push @access_species, $temp_access_name;
				
				foreach(@eukaryota)
				{
					if ($_ eq $species[0])
					{
						$species_found = 1;
						$eukaryota_count[$r]++;
					}
					$r++;
				}
				if ($species_found == 0)
				{
					push @eukaryota, $species[0];
					$eukaryota_count[$r]++;	
					$eukaryota_name_total++;	
				}
			}
			elsif ($kingdom[0] =~ /Viroids/)
			{
				$r = 0;
				$species_found = 0;
				$temp_access_name = "$_,Viroids";
				push @access_species, $temp_access_name;
				
				foreach(@viroids)
				{
					if ($_ eq $species[0])
					{
						$species_found = 1;
						$viroids_count[$r]++;
					}
					$r++;
				}
				if ($species_found == 0)
				{
					push @viroids, $species[0];
					$viroids_count[$r]++;	
					$viroids_name_total++;	
				}
			}
			elsif ($kingdom[0] =~ /Viruses/)
			{
				$r = 0;
				$species_found = 0;
				$temp_access_name = "$_,Viruses";
				push @access_species, $temp_access_name;
				
				foreach(@viruses)
				{
					if ($_ eq $species[0])
					{
						$species_found = 1;
						$viruses_count[$r]++;
					}
					$r++;
				}
				if ($species_found == 0)
				{
					push @viruses, $species[0];
					$viruses_count[$r]++;	
					$viruses_name_total++;	
				}
			}
			else
			{
				$r = 0;
				$species_found = 0;
				$temp_access_name = "$_,Others";
				push @access_species, $temp_access_name;
				
				foreach(@others)
				{
					if ($_ eq $species[0])
					{
						$species_found = 1;
						$others_count[$r]++;
					}
					$r++;
				}
				if ($species_found == 0)
				{
					push @others, $species[0];
					$others_count[$r]++;
					$others_name_total++;	
				}
			}
			
			printf "%5s",$c; 
			printf "%11s",$_; 
			printf "%16s",$kingdom[0]; 
  			print "     $species[0]\n";
			$c++;
		}

		foreach(@bacteria_count)
		{
			$bacteria_total += $_;
			$num_length = length($_);
			if ($num_length == 1)
				{$_ = "$_   ";}
			if ($num_length == 2)
				{$_ = "$_  ";}
			if ($num_length == 3)
				{$_ = "$_ ";}
		}
		foreach(@archaea_count)
		{
			$archaea_total += $_;
			$num_length = length($_);
			if ($num_length == 1)
				{$_ = "$_   ";}
			if ($num_length == 2)
				{$_ = "$_  ";}
			if ($num_length == 3)
				{$_ = "$_ ";}
		}
		foreach(@eukaryota_count)
		{
			$eukaryota_total += $_;
			$num_length = length($_);
			if ($num_length == 1)
				{$_ = "$_   ";}
			if ($num_length == 2)
				{$_ = "$_  ";}
			if ($num_length == 3)
				{$_ = "$_ ";}
		}
		foreach(@viroids_count)
		{
			$viroids_total += $_;
			$num_length = length($_);
			if ($num_length == 1)
				{$_ = "$_   ";}
			if ($num_length == 2)
				{$_ = "$_  ";}
			if ($num_length == 3)
				{$_ = "$_ ";}
		}
		foreach(@viruses_count)
		{
			$viruses_total += $_;
			$num_length = length($_);
			if ($num_length == 1)
				{$_ = "$_   ";}
			if ($num_length == 2)
				{$_ = "$_  ";}
			if ($num_length == 3)
				{$_ = "$_ ";}
		}
		foreach(@others_count)
		{	$others_total += $_;
			$num_length = length($_);
			if ($num_length == 1)
				{$_ = "$_   ";}
			if ($num_length == 2)
				{$_ = "$_  ";}
			if ($num_length == 3)
				{$_ = "$_ ";}
		}

		$total_total = $bacteria_total + $archaea_total + $eukaryota_total + $others_total + $viroids_total + $viruses_total;
		$total_name_total = $bacteria_name_total + $archaea_name_total + $eukaryota_name_total + $others_name_total + $viroids_name_total + $viruses_name_total;

		
		if ($menu_selection != 19)
		{
			open SPECIES, ">$work_dir_name/$output_name_total.species";

			$output_count = 0;
			$page_num = 1;
			$page_num_total = ceil($total_name_total / 52);
	
			print SPECIES "Date: $date                                            Page: $page_num / $page_num_total \n";
			print SPECIES "PDB ID: $output_name_total\nNumber of proteins: $total_count\nNumber of species found: $total_total\n\n";
	
			if ($bacteria_total > 0)
			{
				if ($output_count > 40)
				{
					for ($r = $output_count; $r <= 54; $r++)
						{print SPECIES "\n";}
					$output_count = 4;
					$page_num++;
					print SPECIES "Date: $date                                            Page: $page_num / $page_num_total \n";
					print SPECIES "PDB ID: $output_name_total\nNumber of proteins: $total_count\nNumber of species found: $total_total\n\n";
				
				}
				else
				{
					$output_count = $output_count + 4;
				}

				@temp = ();
				@sort_temp = ();
				@sort_split = ();

				if ($species_sort[0] eq "F")
				{	
					$r = 0;
					foreach(@bacteria)
					{
						push @temp , "$bacteria_count[$r]     $_"; 
						$r++;
					}
					@bacteria = ();
					@bacteria = sort{$b <=> $a} @temp;
				}
				if ($species_sort[0] eq "N")
				{	
					$r = 0;
					foreach(@bacteria)
					{
						push @temp , "$_          $bacteria_count[$r]"; 
						$r++;
					}
					
					@sort_temp = sort{$a cmp $b} @temp;
					@bacteria = ();
					foreach(@sort_temp)
					{
						@sort_split = split(/\s\s\s\s\s\s\s\s\s\s/,$_);
						push @bacteria, "$sort_split[1]     $sort_split[0]";
					}
				}

				print SPECIES "\n\nBACTERIA: $bacteria_total found \n-----------------------------------------------\n";
			
				for ($r = 0; $r < $bacteria_name_total; $r++)
				{
			
					if ($output_count <= 54)
					{
						print SPECIES "$bacteria[$r]\n";
						$output_count++;
					
					}
				
					else
					{
						$page_num++;
						print SPECIES "Date: $date                                            Page: $page_num / $page_num_total \n";
						print SPECIES "PDB ID: $output_name_total\nNumber of proteins: $total_count\nNumber of species found: $total_total\n\n";
						$output_count = 1;
						print SPECIES "$bacteria[$r]\n";									
					}
				}
			}

			if ($archaea_total > 0) 
			{
			
				if ($output_count > 40)
				{
					
					for ($r = $output_count; $r <= 54; $r++)
						{print SPECIES "\n";}
					$output_count = 4;
					$page_num++;
					print SPECIES "Date: $date                                            Page: $page_num / $page_num_total \n";
					print SPECIES "PDB ID: $output_name_total\nNumber of proteins: $total_count\nNumber of species found: $total_total\n\n";
						
				}
				else
				{
					$output_count = $output_count + 4;
				}
			
				@temp = ();
				@sort_temp = ();
				@sort_split = ();												

				if ($species_sort[0] eq "F")
				{	
					$r = 0;
					foreach(@archaea)
					{
						push @temp , "$archaea_count[$r]     $_"; 
						$r++;
					}
					@archaea = ();
					@archaea = sort{$b <=> $a} @temp;											

				}
				if ($species_sort[0] eq "N")
				{	
					$r = 0;
					foreach(@archaea)
					{
						push @temp , "$_          $archaea_count[$r]"; 
						$r++;
					}
					
					@sort_temp = sort{$a cmp $b} @temp;
					@archaea = ();
					foreach(@sort_temp)
					{
						@sort_split = split(/\s\s\s\s\s\s\s\s\s\s/,$_);
						push @archaea, "$sort_split[1]     $sort_split[0]";
					}
				}
	
				print SPECIES "\n\nARCHAEA: $archaea_total found \n-----------------------------------------------\n";
				
				for ($r = 0; $r < $archaea_name_total; $r++)
				{
				
					if ($output_count <= 54)
					{
						print SPECIES "$archaea[$r]\n";
						$output_count++;
						
					}
				
					else
					{
						$page_num++;
						print SPECIES "Date: $date                                            Page: $page_num / $page_num_total \n";
						print SPECIES "PDB ID: $output_name_total\nNumber of proteins: $total_count\nNumber of species found: $total_total\n\n";
						$output_count = 1;
						print SPECIES "$archaea[$r]\n";									
					}
				}
			}	
	
			if ($eukaryota_total > 0)
			{
				if ($output_count > 40)
				{
					for ($r = $output_count; $r <= 54; $r++)
						{print SPECIES "\n";}
					$output_count = 4;
					$page_num++;
					print SPECIES "Date: $date                                            Page: $page_num / $page_num_total \n";
					print SPECIES "PDB ID: $output_name_total\nNumber of proteins: $total_count\nNumber of species found: $total_total\n\n";
						
				}
				else
				{
					$output_count = $output_count + 4;
				}		
	
				@temp = ();
				@sort_temp = ();
				@sort_split = ();	
	
				if ($species_sort[0] eq "F")
				{	
					$r = 0;
					foreach(@eukaryota)
					{
						push @temp , "$eukaryota_count[$r]     $_"; 
						$r++;
					}
					@eukaryota = ();
					@eukaryota = sort{$b <=> $a} @temp;	
	
				}
				if ($species_sort[0] eq "N")
				{	
					$r = 0;
					foreach(@eukaryota)
					{
						push @temp , "$_          $eukaryota_count[$r]"; 
						$r++;
					}
					
					@sort_temp = sort{$a cmp $b} @temp;
					@eukaryota = ();
					foreach(@sort_temp)
					{
						@sort_split = split(/\s\s\s\s\s\s\s\s\s\s/,$_);
						push @eukaryota, "$sort_split[1]     $sort_split[0]";
					}
				}		
	
				print SPECIES "\n\nEUKARYOTA: $eukaryota_total found \n-----------------------------------------------\n";
				
				for ($r = 0; $r < $eukaryota_name_total; $r++)
				{
				
					if ($output_count <= 54)
					{
						print SPECIES "$eukaryota[$r]\n";
						$output_count++;
						
					}
				
					else
					{
						$page_num++;
						print SPECIES "Date: $date                                            Page: $page_num / $page_num_total \n";
						print SPECIES "PDB ID: $output_name_total\nNumber of proteins: $total_count\nNumber of species found: $total_total\n\n";
						$output_count = 1;
						print SPECIES "$eukaryota[$r]\n";									
					}
				}
			}	
	
			if ($viroids_total > 0)
			{	
				if ($output_count > 40)
				{
					for ($r = $output_count; $r <= 54; $r++)
						{print SPECIES "\n";}
					$output_count = 4;
					$page_num++;
					print SPECIES "Date: $date                                            Page: $page_num / $page_num_total \n";
					print SPECIES "PDB ID: $output_name_total\nNumber of proteins: $total_count\nNumber of species found: $total_total\n\n";
						
				}
				else
				{
					$output_count = $output_count + 4;
				}		
	
				@temp = ();
				@sort_temp = ();
				@sort_split = ();		
	
				if ($species_sort[0] eq "F")
				{	
					$r = 0;
					foreach(@viroids)
					{
						push @temp , "$viroids_count[$r]     $_"; 
						$r++;
					}
					@viroids = ();
					@viroids = sort{$b <=> $a} @temp;	
	
				}
				if ($species_sort[0] eq "N")
				{	
					$r = 0;
					foreach(@viroids)
					{
						push @temp , "$_          $viroids_count[$r]"; 
						$r++;
					}
					
					@sort_temp = sort{$a cmp $b} @temp;
					@viroids = ();
					foreach(@sort_temp)
					{
						@sort_split = split(/\s\s\s\s\s\s\s\s\s\s/,$_);
						push @viroids, "$sort_split[1]     $sort_split[0]";
					}
				}	
	
				print SPECIES "\n\nVIROIDS: $viroids_total found \n-----------------------------------------------\n";
				
				for ($r = 0; $r < $viroids_name_total; $r++)
				{
				
					if ($output_count <= 54)
					{
						print SPECIES "$viroids[$r]\n";
						$output_count++;
						
					}
				
					else
					{
						$page_num++;
						print SPECIES "Date: $date                                            Page: $page_num / $page_num_total \n";
						print SPECIES "PDB ID: $output_name_total\nNumber of proteins: $total_count\nNumber of species found: $total_total\n\n";
						$output_count = 1;
						print SPECIES "$viroids[$r]\n";									
					}
				}
			}	
	
			if ($viruses_total > 0)
			{
				if ($output_count > 40)
				{
					for ($r = $output_count; $r <= 54; $r++)
						{print SPECIES "\n";}
					$output_count = 4;
					$page_num++;
					print SPECIES "Date: $date                                            Page: $page_num / $page_num_total \n";
					print SPECIES "PDB ID: $output_name_total\nNumber of proteins: $total_count\nNumber of species found: $total_total\n\n";
						
				}
				else
				{
					$output_count = $output_count + 4;
				}	
	
				@temp = ();
				@sort_temp = ();
				@sort_split = ();		
	
				if ($species_sort[0] eq "F")
				{	
					$r = 0;
					foreach(@viruses)
					{
						push @temp , "$viruses_count[$r]     $_"; 
						$r++;
					}
					@viruses = ();
					@viruses = sort{$b <=> $a} @temp;		
	
				}
				if ($species_sort[0] eq "N")
				{	
					$r = 0;
					foreach(@viruses)
					{
						push @temp , "$_          $viruses_count[$r]"; 
						$r++;
					}
					
					@sort_temp = sort{$a cmp $b} @temp;
					@viruses = ();
					foreach(@sort_temp)
					{
						@sort_split = split(/\s\s\s\s\s\s\s\s\s\s/,$_);
						push @viruses, "$sort_split[1]     $sort_split[0]";
					}
				}		
	
				print SPECIES "\n\nVIRUSES: $viruses_total found \n-----------------------------------------------\n";
				
				for ($r = 0; $r < $viruses_name_total; $r++)
				{
				
					if ($output_count <= 54)
					{
						print SPECIES "$viruses[$r]\n";
						$output_count++;
						
					}
				
					else
					{
						$page_num++;
						print SPECIES "Date: $date                                            Page: $page_num / $page_num_total \n";
						print SPECIES "PDB ID: $output_name_total\nNumber of proteins: $total_count\nNumber of species found: $total_total\n\n";
						$output_count = 1;
						print SPECIES "$viruses[$r]\n";									
					}
				}
			}	
	
			if ($others_total > 0)
			{
				if ($output_count > 40)
				{
					for ($r = $output_count; $r <= 54; $r++)
						{print SPECIES "\n";}
					$output_count = 4;
					$page_num++;
					print SPECIES "Date: $date                                            Page: $page_num / $page_num_total \n";
					print SPECIES "PDB ID: $output_name_total\nNumber of proteins: $total_count\nNumber of species found: $total_total\n\n";
						
				}
				else
				{
					$output_count = $output_count + 4;
				}	
	
				@temp = ();
				@sort_temp = ();
				@sort_split = ();	
	
				if ($species_sort[0] eq "F")
				{	
					$r = 0;
					foreach(@others)
					{
						push @temp , "$others_count[$r]     $_"; 
						$r++;
					}
					@others = ();
					@others = sort{$b <=> $a} @temp;		
	
				}
				if ($species_sort[0] eq "N")
				{	
					$r = 0;
					foreach(@others)
					{
						push @temp , "$_          $others_count[$r]"; 
						$r++;
					}
					
					@sort_temp = sort{$a cmp $b} @temp;
					@others = ();
					foreach(@sort_temp)
					{
						@sort_split = split(/\s\s\s\s\s\s\s\s\s\s/,$_);
						push @others, "$sort_split[1]     $sort_split[0]";
					}
				}		
	
				print SPECIES "\n\nOTHERS: $others_total found \n-----------------------------------------------\n";
				
				for ($r = 0; $r < $others_name_total; $r++)
				{
				
					if ($output_count <= 54)
					{
						print SPECIES "$others[$r]\n";
						$output_count++;
						
					}
				
					else
					{
						$page_num++;
						print SPECIES "Date: $date                                            Page: $page_num / $page_num_total \n";
						print SPECIES "PDB ID: $output_name_total\nNumber of proteins: $total_count\nNumber of species found: $total_total\n\n";
						$output_count = 1;
						print SPECIES "$others[$r]\n";									
					}
				}
			}
		
			close SPECIES;
			print LOG "$output_name_total.species\n";
		}
		else
		{
			my @temp_species;
			my $append;
			my $bacteria_check = 0;
			my $archaea_check = 0;
			my $eukaryota_check = 0;
			my $viroids_check = 0;
			my $viruses_check = 0;
			my $others_check = 0;
			
			print "\n\n";
			
			if ($bacteria_total > 0)
			{	
				$append = "_bactaria.workfile";
				open BACTERIA, ">$work_dir_name/$output_name_total$append";
				print "\nGenerating $work_dir_name/$output_name_total$append";
			}
			
			if ($archaea_total > 0)
			{	
				$append = "_archaea.workfile";
				open ARCHAEA, ">$work_dir_name/$output_name_total$append";
				print "\nGenerating $work_dir_name/$output_name_total$append";
			}
			
			if ($eukaryota_total > 0)
			{	
				$append = "_eukaryota.workfile";
				open EUKARYOTA, ">$work_dir_name/$output_name_total$append";
				print "\nGenerating $work_dir_name/$output_name_total$append";
			}
			
			if ($viroids_total > 0)
			{	
				$append = "_viroids.workfile";
				open VIROIDS, ">$work_dir_name/$output_name_total$append";
				print "\nGenerating $work_dir_name/$output_name_total$append";
			}
			
			if ($viruses_total > 0)
			{	
				$append = "_viruses.workfile";
				open VIRUSES, ">$work_dir_name/$output_name_total$append";
				print "\nGenerating $work_dir_name/$output_name_total$append";
			}
			if ($others_total > 0)
			{	
				$append = "_others.workfile";
				open OTHERS, ">$work_dir_name/$output_name_total$append";
				print "\nGenerating $work_dir_name/$output_name_total$append";
			}
		
			print "\n";		
		
			for ($r = 1; $r <= $num_proteins; $r++)
			{
				@temp_species = split(/\,/,$access_species[$r-1]);
								
				if ($temp_species[1] eq "Bacteria")
				{
					$bacteria_check++;
				
					print BACTERIA ">$work_names[$r-1]\n";
					
					for ($c = 1; $c <= $seq_length; $c++)
					{
						print BACTERIA "$super_array[$c][$r]";	
					}
					print BACTERIA "\n";
						
					if ($bacteria_check == $bacteria_total)
					{
						print BACTERIA "\n";
		
						foreach(@pdb_nums)
						{
							print BACTERIA "&& $_\n"; 
						}

						print BACTERIA "\n";
	
						print BACTERIA "<< $bacteria_total";
						close BACTERIA;
					}
				}
				elsif ($temp_species[1] eq "Archaea")
				{
					$archaea_check++;
				
					print ARCHAEA ">$work_names[$r-1]\n";
					
					for ($c = 1; $c <= $seq_length; $c++)
					{
						print ARCHAEA "$super_array[$c][$r]";	
					}
					print ARCHAEA "\n";
						
					if ($archaea_check == $archaea_total)
					{		     
						print ARCHAEA "\n";
		
						foreach(@pdb_nums)
						{
							print ARCHAEA "&& $_\n"; 
						}

						print ARCHAEA "\n";
	
						print ARCHAEA "<< $archaea_total";
						close ARCHAEA;
					}
				}
				elsif ($temp_species[1] eq "Eukaryota")
				{	
					$eukaryota_check++;
				
					print EUKARYOTA ">$work_names[$r-1]\n";
					
					for ($c = 1; $c <= $seq_length; $c++)
					{
						print EUKARYOTA "$super_array[$c][$r]";	
					}
					print EUKARYOTA "\n";
						
					if ($eukaryota_check == $eukaryota_total)
					{
						print EUKARYOTA "\n";
		
						foreach(@pdb_nums)
						{
							print EUKARYOTA "&& $_\n"; 
						}

						print EUKARYOTA "\n";
	
						print EUKARYOTA "<< $eukaryota_total";
						close EUKARYOTA;
					}
				}
				elsif ($temp_species[1] eq "Viroids")
				{
					$viroids_check++;
				
					print VIROIDS ">$work_names[$r-1]\n";
					
					for ($c = 1; $c <= $seq_length; $c++)
					{
						print VIROIDS "$super_array[$c][$r]";	
					}
					print VIROIDS "\n";
						
					if ($viroids_check == $viroids_total)
					{
						print VIROIDS "\n";
		
						foreach(@pdb_nums)
						{
							print VIROIDS "&& $_\n"; 
						}

						print VIROIDS "\n";
	
						print VIROIDS "<< $viroids_total";
						close VIROIDS;
					}
				}
				elsif ($temp_species[1] eq "Viruses")
				{
					$viruses_check++;
				
					print VIRUSES ">$work_names[$r-1]\n";
					
					for ($c = 1; $c <= $seq_length; $c++)
					{
						print VIRUSES "$super_array[$c][$r]";	
					}
					print VIRUSES "\n";
						
					if ($viruses_check == $viruses_total)
					{
						print VIRUSES "\n";
		
						foreach(@pdb_nums)
						{
							print VIRUSES "&& $_\n"; 
						}

						print VIRUSES "\n";
	
						print VIRUSES "<< $viruses_total";
						close VIRUSES;
					}
				}
				elsif ($temp_species[1] eq "Others")
				{
					$others_check++;
				
					print OTHERS ">$work_names[$r-1]\n";
					
					for ($c = 1; $c <= $seq_length; $c++)
					{
						print OTHERS "$super_array[$c][$r]";	
					}
					print OTHERS "\n";
						
					if ($others_check == $others_total)
					{
						print OTHERS "\n";
		
						foreach(@pdb_nums)
						{
							print OTHERS "&& $_\n"; 
						}

						print OTHERS "\n";
	
						print OTHERS "<< $others_total";
						close OTHERS;
					}
				}
			}
		
			&end_menu_function(3);
		}			

	}

###########################################################################
#	Generating new Workfile
###########################################################################


	if ($menu_selection == 4 || $menu_selection == 5 || $menu_selection == 6 || $menu_selection == 7 || $menu_selection == 8 || $menu_selection == 12)
	{
		open WORK, ">$work_dir_name/$output_name_total.workfile";
		
		for ($r = 1; $r <= $num_proteins; $r++)
		{
			if ($flagged[$r] == 1)
			{	
				print WORK ">$work_names[$r-1]\n";
			
				for ($c = 1; $c <= $seq_length; $c++)
				{
					print WORK "$super_array[$c][$r]";	
				}
				print WORK "\n";
			}
		}
		print WORK "\n";
	
		foreach(@pdb_nums)
		{
			print WORK "&& $_\n"; 
		}

		print WORK "\n";
	
		print WORK "<< $total_count";

		print LOG "$output_name_total.workfile\n";
		close WORK;
	}


###########################################################################
#	Generating Frequency Output
###########################################################################



	if ($menu_selection == 9)
	{
		open FREQ, ">$work_dir_name/$output_name_total.frequency";

		$output_count = 0;
		$page_num = 1;
		$page_num_total = ceil($num_freq_total / 52);

		if ($range_threshold[0] =~ /T/)
		{ 
			print FREQ "Date: $date                                            Page: $page_num / $page_num_total\n";
			print FREQ "PDB ID: $output_name[0]\nNumber of proteins: $num_proteins\nFrequency of: $aa_name[0]\nTreshold: $freq_treshold[0]\nSequence Length: $seq_length\n\n";
			print FREQ "Location     Frequency\n";
		}

		if ($range_threshold[0] =~ /R/)
		{ 
			print FREQ "Date: $date                                            Page: $page_num / $page_num_total\n";
			print FREQ "PDB ID: $output_name[0]\nNumber of proteins: $num_proteins\nFrequency of: $aa_name[0]\nRange: $lower_range[0] - $upper_range[0]\nSequence Length: $seq_length\n\n";
			print FREQ "Location     Frequency\n";
		}	

		for ($r = 0;$r < $seq_length ;$r++)
		{
			if ($output_count == 52)
			{
				$page_num++;
				if ($range_threshold[0] =~ /T/)
				{ 
					print FREQ "Date: $date                                            Page: $page_num / $page_num_total\n";
					print FREQ "PDB ID: $output_name[0]\nNumber of proteins: $num_proteins\nFrequency of: $aa_name[0]\nTreshold: $freq_treshold[0]\nSequence Length: $seq_length\n\n";
					print FREQ "Location     Frequency\n";
				}

				if ($range_threshold[0] =~ /R/)
				{ 
					print FREQ "Date: $date                                            Page: $page_num / $page_num_total\n";
					print FREQ "PDB ID: $output_name[0]\nNumber of proteins: $num_proteins\nFrequency of: $aa_name[0]\nRange: $lower_range[0] - $upper_range[0]\nSequence Length: $seq_length\n\n";
					print FREQ "Location     Frequency\n";
				}

				$output_count = 0;
			}
		
			if ($freq_flagged[$r] == 1)
			{
				print FREQ "   $pdb_nums[$r]        ";
				if ($pdb_nums[$r-1] < 10)
					{print FREQ "    ";}
				if ($pdb_nums[$r-1] < 100 && $pdb_nums[$r-1] >= 10)
					{print FREQ "   ";}
				if ($pdb_nums[$r-1] < 1000 && $pdb_nums[$r-1] >= 100)
					{print FREQ "  ";}
				print FREQ "$freq_nums[$r]\n";
				$output_count++;
			}
		}
		
		print LOG "$output_name_total.frequency\n";
		close FREQ;
	}
	
	close LOG;
	
if ($menu_selection == 21)
		{
		my $acc_name;
		my $acc_file;
		my @acc_list;
		my $position =0;
		my $acc_num;
						
		print "\nPlease Enter name of ACC\n>";
		$acc_name = <STDIN>;
		chomp $acc_name;
		$acc_file = `cat $work_dir_name/$acc_name`;				
		@acc_list = split(/\n/,$acc_file);
		$acc_num = @acc_list;
		
		
		open ACC_RE, ">$work_dir_name/out.align";
		for(my $ac=0;$ac<$acc_num;$ac++)
			{
			$position=0;
			foreach(@work_names)											
				{
				if($_ =~ /$acc_list[$ac]/)
					{
					print ACC_RE "> $_\n$work_lines[$position]\n";
					
					}
					
							
				$position++;
				}
			}
		close ACC_RE;
		print "\n.... Generated file called: out.align\n";
	
		&end_menu_function(1);	
		}
	
	
	
	
	
	
	
	
	
	
	if ($menu_selection != 19)
		{&end_menu_function(1);}
}


###########################################################################
#	Comparing Two Tables
###########################################################################

sub option4
{
	my @table_A;
	my @table_A_valid;
	my $table_A_file;
	my $table_A_temp;
	my @table_A_by_total;
	my @table_A_by_date;
	my @table_A_by_lines_temp;
	my @table_A_by_lines;
	my @table_A_by_lines_chars;
	my @table_A_formatted;
	my @table_A_id;
	my @pdb_index;
	my @percents_A;
	my @table_B;
	my @table_B_valid;
	my $table_B_temp;
	my $table_B_file;
	my @table_B_by_total;
	my @table_B_by_date;
	my @table_B_by_lines_temp;
	my @table_B_by_lines;
	my @table_B_by_lines_chars;
	my @table_B_formatted;
	my @table_B_id;
	my @table_total;
	my @percents_B;
	my $r;
	my $c;
	my $d;
	my $seq_length;
	my @aacid;
	my %aanum;
		undef %aanum;
	my $compare_difference_temp;
	my @compare_difference;
	my $num_of_compare;
	my $output_count;
	my $page_num;
	my $page_num_total;
	my $output_name_total;
	my $out_file_name;
	
	@aacid = (" ","V","L","I","M","F","W","Y","G","A","P","S","T","C","H","R","K","Q","E","N","D");
	for ($d = 0;$d <=20;$d++)
	{	$aanum{$aacid[$d]} = $d;}
	
	
	print "\nPlease enter first table\n>";
	$table_A_temp = <STDIN>;
	@table_A = split(/\n/, $table_A_temp);
	if (!($table_A[0] =~ /.table/))
		{ print "\n*** Invalid File ***\n\n"; print "\n> Press <ENTER>"; <STDIN>; &main; die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";}
	$table_A_file = `cat $work_dir_name/$table_A[0]` or print "\n*** Invalid File ***\n\n"; 
 	if (!($table_A_file))
		{print "\n> Press <ENTER>"; <STDIN>; &main; die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";}

	print "\nPlease enter second table\n>";
	$table_B_temp = <STDIN>;
	@table_B = split(/\n/, $table_B_temp);
	if (!($table_B[0] =~ /.table/))
		{ print "\n*** Invalid File ***\n\n"; print "\n> Press <ENTER>"; <STDIN>; &main; die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";}
	$table_B_file = `cat $work_dir_name/$table_B[0]` or print "\n*** Invalid File ***\n\n";
 	if (!($table_B_file))
		{print "\n> Press <ENTER>"; <STDIN>; &main; die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";}

	$output_name_total = "$table_A[0] compared to $table_B[0]";
	
	@table_A_valid = split(/\_|\./,$table_A[0]);
	@table_B_valid = split(/\_|\./,$table_B[0]);
	
	if (!($table_A_valid[0] =~ $table_B_valid[0] ))
	{
		print "\n*** Invalid! Tables must have the same PDB identification ***\n\n"; print "\n> Press <ENTER>";
		<STDIN>;
		&main;
		die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
	}

	
	print "\nPlease enter minimum difference\n>";
	$compare_difference_temp = <STDIN>;
	@compare_difference = split(/\n/,$compare_difference_temp);
	if ($compare_difference[0] < 0 || $compare_difference[0] > 100)
	{
		print "\n*** Invalid Threshold ***\n\n"; print "\n> Press <ENTER>";
		<STDIN>;
		&main;
		die "Thanx for using this great program:-)\n\nThat's your fault ---> Program died";
	}

	@table_A_id = split(/\.table/,$table_A[0]);
	@table_B_id = split(/\.table/,$table_B[0]);
	$out_file_name = "$table_A_id[0]_vs_$table_B_id[0].comparison";

	@table_A_by_total = split(/Total/, $table_A_file);
	shift @table_A_by_total;
	
	foreach(@table_A_by_total)
	{
		@table_A_by_date = split(/Date/,$_);
		push @table_A_formatted, $table_A_by_date[0];
	}

	foreach(@table_A_formatted)
	{
		@table_A_by_lines_temp = split(/\n/, $_);
		foreach(@table_A_by_lines_temp)
		{
			if ($_ =~ /\d+/)
			{
				push @table_A_by_lines, $_;
			}
		}
	}

	$r = 0;
	foreach(@table_A_by_lines)
	{
		$_ = "  $_";
		@table_A_by_lines_chars = split(/\s+/,$_);
		pop @table_A_by_lines_chars;
		push @pdb_index, $table_A_by_lines_chars[1];
		shift @table_A_by_lines_chars;
		shift @table_A_by_lines_chars;

		$c = 1;
		foreach(@table_A_by_lines_chars)
		{
			$percents_A[$r][$c] = $_;
			$c++;
		}
	
		$r++;
	}
	
	$seq_length = $r;

	@table_B_by_total = split(/Total/, $table_B_file);
	shift @table_B_by_total;
	
	foreach(@table_B_by_total)
	{
		@table_B_by_date = split(/Date/,$_);
		push @table_B_formatted, $table_B_by_date[0];
	}

	foreach(@table_B_formatted)
	{
		@table_B_by_lines_temp = split(/\n/, $_);
		foreach(@table_B_by_lines_temp)
		{
			if ($_ =~ /\d+/)
			{
				push @table_B_by_lines, $_;
			}
		}
	}

	$r = 0;
	foreach(@table_B_by_lines)
	{
		
		$_ = "  $_";
		@table_B_by_lines_chars = split(/\s+/,$_);
		push @table_total, $table_B_by_lines_chars[22];
		pop @table_B_by_lines_chars;
		shift @table_B_by_lines_chars;
		shift @table_B_by_lines_chars;

		$c = 1;
		foreach(@table_B_by_lines_chars)
		{
		
			$percents_B[$r][$c] = $_;
			$c++;
		}
	
		$r++;
	}
	

	open COM, ">$work_dir_name/$out_file_name";

	$num_of_compare = 0;
	$output_count = 0;
	$page_num = 1;
	
	for ($r = 0; $r < $seq_length; $r++)
	{	
		for ($c = 1; $c <= 20; $c++)
		{
			if(($percents_B[$r][$c] - $percents_A[$r][$c]) >= $compare_difference[0])
				{$num_of_compare++;}
		}
	}
	
	$page_num_total = ceil($num_of_compare / 54);

	print COM "Date: $date                                            Page: $page_num / $page_num_total \n";
	print COM "PDB ID: $output_name_total\nSequence Length: $seq_length\nMinimum Difference: $compare_difference[0]%\n\n";
	print COM " AA	Position          Before(%)       After(%)    Difference(%)     Total\n";
		
	for ($r = 0; $r < $seq_length; $r++)
	{	
		for ($c = 1; $c <= 20; $c++)
		{
			if(($percents_B[$r][$c] - $percents_A[$r][$c]) >= $compare_difference[0])
			{
				print COM " $aacid[$c]         $pdb_index[$r]";
				if ($pdb_index[$r] < 10)
					{print COM "    ";}
				if ($pdb_index[$r] < 100 && $pdb_index[$r] >= 10)
					{print COM "   ";}
				if ($pdb_index[$r] < 1000 && $pdb_index[$r] >= 100)
					{print COM "  ";}
				printf COM "%15.0f", $percents_A[$r][$c];
				printf COM "%15.0f", $percents_B[$r][$c];
				printf COM "%15.0f", ($percents_B[$r][$c] - $percents_A[$r][$c]);
				printf COM "%15.0f", $table_total[$r];
				print COM "\n";

				$output_count++;

				if ($output_count == 54)
				{
					$page_num++;
				
					print COM "Date: $date                                            Page: $page_num / $page_num_total\n";
					print COM "PDB ID: $output_name_total\nSequence Length: $seq_length\nMinimum Difference: $compare_difference[0]%\n\n";
					print COM " AA	Position          Before(%)      After(%)    Difference(%)     Total\n";
						
					$output_count = 0;
				}
			}
		}
	}

	close COM;
	print "\nGenerating Comparison Output... ($out_file_name)\n";
	&end_menu_function(3);
}



###########################################################################
#	Comparing Multiple Tables (Fingerprints)
###########################################################################


sub option7
{
	my $num_tables;
	my $name_table;
	my @tables;
	my $table_file;
	my $a;
	my $r;
	my $c;
	my @table_by_total;
	my @table_by_date;
	my @table_formatted;
	my @table_by_lines_temp;
	my @table_by_lines;
	my @table_by_lines_chars;
	my @pdb_index;
	my @percents;
	my @aacid;
	my %aanum;
		undef %aanum;
	my $index;
	my $index_plus_one;
	my $seq_length;
	my @position_OK;
	my $temp;
	my $cot;
	my @multi_residue;
	my $multi_residue_control;
	my @residue_duplicate_check;
	my $duplicate_check_control;
	my @thres_array;
	my @input;
	my @output_positions;
	my @variation_count;
	my $best_variation;
	my $comparison_threshold;
	my $summary_threshold;
	my $variation_threshold;
	my $nothing_spacer;
	my $print_control;
	my $table_entry;
	my @subvals;
	my $subval_index;
	my @count_50_plus;
	my @count_60_plus;
	my @count_70_plus;
	my @count_80_plus;
	my @count_90_plus;
	my @count_50_plus_group;
	my @count_60_plus_group;
	my @count_70_plus_group;
	my @count_80_plus_group;
	my @count_90_plus_group;
	
	
	@aacid = (" ","V","L","I","M","F","W","Y","G","A","P","S","T","C","H","R","K","Q","E","N","D");
	for ($a = 0; $a <=20; $a++)
		{$aanum{$aacid[$a]} = $a;}
	
	
	
#	for ($a = 1; $a <= $num_tables; $a++)
#	{
#		print "\nPlease enter table $a\n>";
#		$name_table = <STDIN>;
#		chop $name_table;
#		push @tables, $name_table;
#	}
	
	
	
	$name_table = `cat $work_dir_name/table_file.txt`;
	#$name_table = `cat $work_dir_name/two_tables.txt`;
	@tables = split(/\n/,$name_table);
	
	$num_tables = @tables;
			
	print "\n\nPlease enter comparison threshold:\n>";
	$comparison_threshold = <STDIN>;
	chomp $comparison_threshold;
	
	print "\n\nPlease enter table summary threshold (>51):\n>";
	$summary_threshold = <STDIN>;
	chomp $summary_threshold;
	
	print "\n\nNumber of tables: $num_tables\n";
	
	open AUS, ">$work_dir_name/fingerprint_comparison.txt"; 
		
	$index = 0;
	
	foreach(@tables)
	{
		@input = split(/\,/,$_);
		push @thres_array, $input[1];
		
		print "\n$input[0]\n";
		$index_plus_one = $index + 1;
		
		print AUS "($index_plus_one)  $input[0]\n";
		print AUS "\t Threshold: $input[1]\n";
		
		$table_file = `cat $work_dir_name/$input[0]` or "cant open $input[0]";
		
		@table_by_total = split(/Total/, $table_file);
		shift @table_by_total;
	
		foreach(@table_by_total)
		{
			@table_by_date = split(/Date/,$_);
			push @table_formatted, $table_by_date[0];
		}

		foreach(@table_formatted)
		{
			@table_by_lines_temp = split(/\n/, $_);
			foreach(@table_by_lines_temp)
			{
				if ($_ =~ /\d+/)
				{
					push @table_by_lines, $_;
				}
			}
		}

		$r = 0;
		foreach(@table_by_lines)
		{
			$_ = "  $_";
			@table_by_lines_chars = split(/\s+/,$_);
			pop @table_by_lines_chars;
			
			if ($index == 0)
			{
				push @pdb_index, $table_by_lines_chars[1];
				$position_OK[$r] = 0;
				$variation_count[$r] = 0;	
			}
			
			shift @table_by_lines_chars;
			shift @table_by_lines_chars;

			$c = 1;
			foreach(@table_by_lines_chars)
			{
				$percents[$r][$c][$index] = $_;
				$c++;
			}
	
			$r++;
		}
		
		$index++;
		
		@table_by_lines = ();
		@table_by_lines_chars = ();
		@table_formatted = ();
		@table_by_total = ()	
	}
	
	
	
	$seq_length = $r;
	
	for ($index = 0; $index < $num_tables; $index++)
	{	
		for ($r = 0; $r < $seq_length; $r++)
		{	
			for ($c = 1; $c <= 20; $c++)
			{
				if ($percents[$r][$c][$index] >= $comparison_threshold && $percents[$r][$c][0] <= $thres_array[0])
				{
					if ($position_OK[$r] > 0)
					{
						$cot = "$position_OK[$r],";
						@residue_duplicate_check = split(/\,/,$cot);
						
						$duplicate_check_control = 0;
						
						foreach(@residue_duplicate_check)
						{
							if ($_ == $c)
								{$duplicate_check_control++;}
						}	
						
						if ($duplicate_check_control == 0)
						{
							$temp = $position_OK[$r];
							$position_OK[$r] = "$temp,$c";
						}
					}
					else
					{		
						$position_OK[$r] = $c;
					}
				}
			}
		}
	}
	
	print AUS "\n\n";
		
		
	for ($r = 0; $r < $seq_length; $r++)
	{	
		$multi_residue_control = 0;
		
		for ($index = 0; $index < $num_tables; $index++)
		{
			if ($position_OK[$r] =~ /\,/ && $multi_residue_control == 0)
			{
				@multi_residue = split(/\,/, $position_OK[$r]);
				
				print AUS "\n";
				
				foreach(@multi_residue)
				{
					if ($index == 0)
					{
						print AUS "\n"; 
						printf AUS "%4s",$pdb_index[$r];
						printf AUS "%2s",$aacid[$_];
						$variation_count[$r]++;
					}
					
					for ($a = 0; $a < $num_tables; $a++)
					{
			#			if ($percents[$r][$_][$a] >= $thres_array[$a] && $a != 0)
			#				{$percents[$r][$_][$a] = "[$percents[$r][$_][$a]]";}
					
						printf AUS "%10s",$percents[$r][$_][$a];
						
					}
					
				}
				print AUS "\n"; 
				
				$multi_residue_control++;
			}
			if ($position_OK[$r] > 0 && (!($position_OK[$r] =~ /\,/)))
			{
				if ($index == 0)
				{
					print AUS "\n";
					printf AUS  "%4s",$pdb_index[$r];
					printf AUS "%2s",$aacid[$position_OK[$r]];
					$variation_count[$r]++;
				}
				
			#	if ($percents[$r][$position_OK[$r]][$index] >= $thres_array[$index] && $index != 0)
			#	{
			#		$percents[$r][$position_OK[$r]][$index] = "[$percents[$r][$position_OK[$r]][$index]]";
			#	}			
				printf AUS "%10s",$percents[$r][$position_OK[$r]][$index];
			
			}
		}
	
	}
	
	open EX, ">$work_dir_name/position_table.txt";
	open SUM, ">$work_dir_name/position_table_summary.txt";
		
	
	$best_variation = 0;
	
	for ($r = 0; $r < $seq_length; $r++)
	{
		if ($variation_count[$r] > $best_variation)
			{$best_variation = $variation_count[$r];}
	#	print EX "\n";
	#	printf EX "%4s", $pdb_index[$r];
	#	printf EX "%4s", $variation_count[$r];
	}
	
	for ($c = $best_variation; $c >= 0; $c--)
	{
		print EX "\n\n\nNumber of variations: $c\n-----------------------------------------";
		print "\n\n\nNumber of variations: $c\n-----------------------------------------";

		for ($r = 0; $r < $seq_length; $r++)
		{
			if ($c == $variation_count[$r])
			{
				print EX "\n";
				printf EX "%4s", $pdb_index[$r]; 
				
				print "\n";
				printf "%4s", $pdb_index[$r]; 
			}
		}
	}
	
	print EX "\n\n";
	
	print "\n\n\nPlease select the number of variations to be considered for ";
	print "\nthe position summary table:\n>";
	$variation_threshold = <STDIN>;
	chomp $variation_threshold;
	
	for ($r = 0; $r < $seq_length; $r++)
	{
		if ($variation_threshold <= $variation_count[$r])
		{
			push @output_positions, $pdb_index[$r]; 
		}
	}
	
	$nothing_spacer = "-";
	$print_control = 0;
	$subval_index = 0;
	
	foreach(@output_positions)
	{
		for ($r = 0; $r < $seq_length; $r++)
		{	
			if($pdb_index[$r] == $_)
			{
				for ($index = 0; $index < $num_tables; $index++)
				{
					$print_control = 0;
					
					for ($c = 1; $c <= 20; $c++)
					{
						if ($percents[$r][$c][$index] >= $summary_threshold)
						{
							$subvals[$subval_index][$index] = $percents[$r][$c][$index];
							$print_control++;
						}
					}
					
					if ($print_control == 0)
					{
						$subvals[$subval_index][$index] = 0;
					}
				}
	
			}
		}
		
		$count_50_plus[$subval_index] = 0;
		$count_60_plus[$subval_index] = 0;
		$count_70_plus[$subval_index] = 0;
		$count_80_plus[$subval_index] = 0;
		$count_90_plus[$subval_index] = 0;
		$count_50_plus_group[$subval_index] = 0;
		$count_60_plus_group[$subval_index] = 0;
		$count_70_plus_group[$subval_index] = 0;
		$count_80_plus_group[$subval_index] = 0;
		$count_90_plus_group[$subval_index] = 0;
		
		$subval_index++;
	}	
	
	for ($r = 0; $r < $subval_index; $r++)
	{
		for ($c = 0; $c <= $index; $c++)
		{
			if($subvals[$r][$c] > 50 && $subvals[$r][$c] <= 60)
			{
				$count_50_plus[$r]++;
			}
		
			if($subvals[$r][$c] > 60 && $subvals[$r][$c] <= 70)
			{
				$count_60_plus[$r]++;
			}
			
			if($subvals[$r][$c] > 70 && $subvals[$r][$c] <= 80)
			{
				$count_70_plus[$r]++;
			}
			
			if($subvals[$r][$c] > 80 && $subvals[$r][$c] <= 90)
			{
				$count_80_plus[$r]++;
			}
			
			if($subvals[$r][$c] > 90 && $subvals[$r][$c] <= 100)
			{
				$count_90_plus[$r]++;
			}
		}
	}

	for ($c = 0; $c <= $index; $c++)
	{	
		for ($r = 0; $r < $subval_index; $r++)
		{
			if($subvals[$r][$c] > 50 && $subvals[$r][$c] <= 60)
			{
				$count_50_plus_group[$c]++;
			}
		
			if($subvals[$r][$c] > 60 && $subvals[$r][$c] <= 70)
			{
				$count_60_plus_group[$c]++;
			}
			
			if($subvals[$r][$c] > 70 && $subvals[$r][$c] <= 80)
			{
				$count_70_plus_group[$c]++;
			}
			
			if($subvals[$r][$c] > 80 && $subvals[$r][$c] <= 90)
			{
				$count_80_plus_group[$c]++;
			}
			
			if($subvals[$r][$c] > 90 && $subvals[$r][$c] <= 100)
			{
				$count_90_plus_group[$c]++;
			}
		}
	}

	
	$subval_index = 0;
	
	foreach(@output_positions)
	{
		for ($r = 0; $r < $seq_length; $r++)
		{	
			if($pdb_index[$r] == $_)
			{
				print EX "\nPosition: $pdb_index[$r]\n\n";
				printf SUM "%3s", $_;
				printf SUM "%3s", $variation_count[$r];
				
				for ($index = -1; $index < $num_tables; $index++)
				{
					$print_control = 0;
					
					for ($c = 1; $c <= 20; $c++)
					{
						if ($index == -1)
						{
							printf EX "%4s", $aacid[$c];
						}
						else
						{
							printf EX "%4s", $percents[$r][$c][$index];
							
							if ($percents[$r][$c][$index] >= $summary_threshold)
							{
								$table_entry = "$percents[$r][$c][$index] $aacid[$c]";
								printf SUM "%8s", $table_entry;
								$print_control++;
							}
						}
		
					}
					print EX "\n";
					
					if ($print_control == 0 && $index != -1)
					{
						printf SUM "%8s", $nothing_spacer;
					}
					
				}
				
				print EX "\n\n";
				
				printf SUM "%8s", $count_50_plus[$subval_index];
				printf SUM "%8s", $count_60_plus[$subval_index];
				printf SUM "%8s", $count_70_plus[$subval_index];
				printf SUM "%8s", $count_80_plus[$subval_index];
				printf SUM "%8s", $count_90_plus[$subval_index];				
				
				print SUM "\n";
			}
		}
		
		$subval_index++;
	}
		
	print SUM "\n      ";
	
	for ($index = 0; $index < $num_tables; $index++)
	{
		printf SUM "%8s",$count_50_plus_group[$index];
	}
	print SUM "\n      ";
	
	for ($index = 0; $index < $num_tables; $index++)
	{
		printf SUM "%8s",$count_60_plus_group[$index];
	} 
	print SUM "\n      ";
	
	for ($index = 0; $index < $num_tables; $index++)
	{
		printf SUM "%8s",$count_70_plus_group[$index];
	}
	print SUM "\n      ";
	
	for ($index = 0; $index < $num_tables; $index++)
	{
		printf SUM "%8s",$count_80_plus_group[$index];
	}
	print SUM "\n      ";
	
	for ($index = 0; $index < $num_tables; $index++)
	{
		printf SUM "%8s",$count_90_plus_group[$index];
	}
	print SUM "\n      ";
	
			
	close EX;
	close AUS;
	close SUM;
	
	print "\n\n\n";	
		
	
}


###########################################################################
#	To exclude a subset from workfile
###########################################################################

sub exclude
{
	my $original_name;
	my $original;
	my $subset_name;
	my $output_name;
	my $subset;
	my @lines_original;
	my @lines_subset;
	my $num_proteins;
	my $read_control;
	my @work_lines_original;
	my @temp;
	my @pdb_nums;
	my $total_subset;
	my $counter;
	my $same;
	my $temp_name;
	my @work_names_subset;
	my $num_proteins;
	my @flagged;
	my @work_names_original;
	my $r;
	my $total_count;
	
	
	print "\nEnter original workfile\n>";
	$original_name = <STDIN>;
	chop $original_name;

	$original = `cat $work_dir_name/$original_name` or "can't open $original_name";

	print "\n\nEnter subset (workfile) to be removed\n>";
	$subset_name = <STDIN>;
	chop $subset_name;

	print "\n\nEnter output name for workfile\n>";
	$output_name = <STDIN>;
	chop $output_name;

	$subset = `cat $work_dir_name/$subset_name` or "can't open $subset_name";

	@lines_original = split(/\n/,$original);
	@lines_subset = split(/\n/,$subset);

	$num_proteins = 0;
	$read_control = 0;

	foreach(@lines_original)
	{
		if ($read_control == 1)
		{
			push @work_lines_original, $_;
			$num_proteins++;
			$read_control = 0;
		}
		if ($_ =~ />/)
		{
			@temp = split (/\>/, $_);
			push @work_names_original, $temp[1];
			$read_control++;
		}
		if ($_ =~ /&&/)
		{
			@temp = split (/\&\& /, $_);
			push @pdb_nums, $temp[1];
		}
	}

	$total_subset = 0;

	foreach(@lines_subset)
	{
		if ($_ =~ />/)
		{
			@temp = split (/\>/, $_);
			push @work_names_subset, $temp[1];
			$total_subset++;
		}
	}

	$total_subset--;

	for($a = 0; $a < $num_proteins; $a++)
	{
		$flagged[$a] = 1;
	}

	$counter = 0;
	$same = 0;

	foreach(@work_names_original)
	{	
		$temp_name = $_;
	
		foreach(@work_names_subset)
		{
			if ($temp_name eq $_)
			{
				$flagged[$counter] = 0;
				$same++;
			}	
		}
		$counter++;

	}

	$total_count = $num_proteins - $same;


	open WORK, ">$work_dir_name/$output_name";

	for ($r = 0; $r < $num_proteins; $r++)
	{
		if ($flagged[$r] == 1)
		{	
			print WORK ">$work_names_original[$r]\n";
			print WORK "$work_lines_original[$r]\n";
		}
	}
	print WORK "\n";

	foreach(@pdb_nums)
	{
		print WORK "&& $_\n"; 
	}

	print WORK "\n";
	print WORK "<< $total_count";

	close WORK;

	print "\n\nNumber of proteins in original: $num_proteins";                  
	print "\nNumber of proteins in subset to be removed: $total_subset";
	print "\nNumber of proteins in new workfile: $total_count";
	print "\n\nNumber of proteins removed: $same\n\n";
	print "\nGenerating new workfile: $output_name\n\n\n";
	

}

sub option8
{
my $aln_file;
my $aln_file1;
my @aln_file;
my $edit_file;
my $number_proteins =0;
my $n_p =0;
my @get_seq =();
my @seq =();
my @acc =();
my @acc_hold =();
my @nameing_the_workfile;
my @seq_fix = ();
my $a=0;
my $first = 0;
my $length =0;


print "\n\n\tInput aln file is clustalW with header and nothing different. Just ensure\n\t the file is not converted from binary.  test more **.aln.  If ^M present you need to fix.\n"; 
print "\n\nPlease Enter .aln file from clustalW\n>";

$aln_file1 = <STDIN>;
chomp $aln_file1;
$aln_file = `cat $aln_file1`;
print $aln_file;
@aln_file = split(/\n/,$aln_file);
shift @aln_file;
shift @aln_file;
shift @aln_file;
######### alignment file brake 
foreach(@aln_file)
	{ 
	if((!($_ =~ /\:/ || $_ =~ /\./ || $_ =~ /\*/)) && $_ )
		{
		
		@get_seq = split(/\s+/,$_);
		@acc_hold = split (/\|/,$get_seq[0]);
		
		if(!($n_p))
			{
			push @acc, $acc_hold[1];  
			push @seq, $get_seq[1];  
			}
		else
			{
			$seq[$number_proteins] = $seq[$number_proteins].$get_seq[1];
			
			
			
			
			}
		$number_proteins++; 
		
		}
	else
		{
		$n_p = $number_proteins-1;
		$number_proteins =0;
		}
	}
	#####code to get legnth only, used again a few lines down
	@seq_fix = split(//,$seq[0]);
	$length = @seq_fix;
	#########
		
print "\n\n Number of proteins = $n_p\n Length of Proteins = $length\n Please Wait: ";
	
############################ printing out the workfile, from the clustal alignment

@nameing_the_workfile = split (/\./,$aln_file1);

open WORKFILE_C , ">$nameing_the_workfile[0].workfile";


	foreach(@acc)
	{
	print WORKFILE_C "> [$_]\n";
	@seq_fix = split(//,$seq[$a]);
	
	$first=0;
	
	foreach(@seq_fix)
		{
		
		if(!(/\-/))
			{
			print WORKFILE_C "$_"; 	
			$first++;
			}
		elsif(!($first))
			{
			print WORKFILE_C " "; 	
			}
		else
			{
			print WORKFILE_C "."; 	
			}
			
		}
	$a++;
	print WORKFILE_C "\n";
	print ".";
	}

	print WORKFILE_C "\n";
	for($a=1;$a<=$length;$a++)
		{
		print WORKFILE_C "&& $a\n";
		}
	print WORKFILE_C "\n<< $n_p";

close  WORKFILE_C;
print "\n\nNote:  The current work file is not loaded\n  Output is named >>>> $nameing_the_workfile[0].workfile\n\n";
&go_back;
}


###########################################################################
#	End Menu Function
###########################################################################


sub end_menu_function
{	
	my $menu_choice_temp;
	my @menu_choice;
	my $yes_no;
	my $temp_log;
	my @temp_log_lines;
	my @log_lines;
	my $r;
	my $c;
	my $k;
	my $user_input_temp;
	my @user_input;
	my $count;

	$yes_no = shift();
	
	
	if ($yes_no == 1)
	{
		print "\n\n(1) To view the generated file(s)\n";
		print "(2) To print the generated file(s)\n";
		print "(3) To return to the MAIN MENU and continue work with CURRENT workfile\n";
		print "(4) To return to the MAIN MENU\n";
		print "(5) To EXIT the program\n";
		print "\n\nPlease select option \n>";
		$menu_choice_temp = <STDIN>;
		@menu_choice = split(/\n/,$menu_choice_temp);

		if (!($menu_choice[0] eq "1" || $menu_choice[0] eq "2" || $menu_choice[0] eq "3" || $menu_choice[0] eq "4" || $menu_choice[0] eq "5"))
		{
			print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>";
			&end_menu_function(1);
		}
	
		if ($menu_choice[0] eq "1")
		{
			$temp_log = `cat $work_dir_name/temp.log`;
			
			@temp_log_lines = split(/\n/, $temp_log);
			$r = 1;
			@log_lines = ();
			foreach(@temp_log_lines)
			{
				if (!($_ =~ /workfile/))
				{
					$log_lines[$r] = $_;					
					$r++;
				}
			}
			
			shift @log_lines;
			$r = 1;
			print "\n";
			
			foreach(@log_lines)
			{
				print "($r) $_\n";
					$r++;
			}
			
			print "\nTo open the file press the according number or Press (B) to go back\n>";
			$user_input_temp = <STDIN>;
			@user_input = split(/\n/,$user_input_temp);
			$user_input[0] = uc($user_input[0]);

			if (!($user_input[0] =~ /B/ || $user_input[0] =~ /\d/))
			{
				print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>"; <STDIN>;
				&end_menu_function(1);
			}		
			else
			{	
				if (!($user_input[0] >= $r || $user_input[0] < 1))
				{
					`j $work_dir_name/$log_lines[$user_input[0]-1]`;
					&end_menu_function(1);
				}
				elsif ($user_input[0] eq "B")
				{
					&end_menu_function(1);
				}
				else
				{
					print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>"; <STDIN>;
					&end_menu_function(1);
				}
			}
		}
	
		if ($menu_choice[0] eq "2")
		{
			$temp_log = `cat $work_dir_name/temp.log`;
			@temp_log_lines = split(/\n/, $temp_log);
			$r = 1;
			@log_lines = ();
			foreach(@temp_log_lines)
			{
				if (!($_ =~ /workfile/))
				{
					$log_lines[$r] = $_;					
					$r++;
				}
			}
			
			shift @log_lines;
			$r = 1;
			print "\n";
			
			foreach(@log_lines)
			{
				print "($r) $_\n";
				$r++;
			}

			print "\nTo print the file press the according number or Press (B) to go back\n>";
			$user_input_temp = <STDIN>;
			@user_input = split(/\n/,$user_input_temp);
			$user_input[0] = uc($user_input[0]);

			if (!($user_input[0] =~ /B/ || $user_input[0] =~ /\d/))
			{
				print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>"; <STDIN>;
				&end_menu_function(1);
			}		
			else
			{	
				if (!($user_input[0] >= $r || $user_input[0] < 1))
				{
					if ($menu_selection == 11 || $menu_selection == 12)
					{
						`lp -dlj2 -o"fontsize 6" $work_dir_name/$log_lines[$user_input[0]-1]`;
					}
					else
					{
						`lp -dlj2 $work_dir_name/$log_lines[$user_input[0]-1]`;
					}
					&end_menu_function(1);
				}
				elsif ($user_input[0] eq "B")
				{
					&end_menu_function(1);
				}
				else
				{
					print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>"; <STDIN>;
					&end_menu_function(1);
				}
			}
		}	

		if ($menu_choice[0] eq "3")
		{			
			&main;
		}
	
		if ($menu_choice[0] eq "4")
		{	
			open LOG, ">>$work_dir_name/temp.log";
			print LOG "donot\n";
			close LOG;
			$work_dir_name = $dir_name;
			&main;
		}
			
		if ($menu_choice[0] eq "5")
		{
			print "\nThanx for using this great program!!! \nEXITING...\n\n";
			`rm $work_dir_name/temp.log`;
		}

	}
	elsif ($yes_no == 2)
	{
		print "\n\n(1) To return to the MAIN MENU and work with just created workfile\n";
		print "(2) To EXIT the program\n";
		print "\n\nPlease select option \n>";
		$menu_choice_temp = <STDIN>;
		@menu_choice = split(/\n/,$menu_choice_temp);

		if (!($menu_choice[0] eq "1" || $menu_choice[0] eq "2"))
		{
			print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>"; <STDIN>;
			&end_menu_function(2);
		}

		if ($menu_choice[0] eq "1")
		{
			&main;
		}

		if ($menu_choice[0] eq "2")
		{
			print "\nThanx for using this great program!!! \nEXITING...\n\n";
		}
	}
	else
	{
		print "\n\n(1) To return to the MAIN MENU\n";
		print "(2) To EXIT the program\n";
		print "\n\nPlease select option \n>";
		$menu_choice_temp = <STDIN>;
		@menu_choice = split(/\n/,$menu_choice_temp);

		if (!($menu_choice[0] eq "1" || $menu_choice[0] eq "2"))
		{
			print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>"; <STDIN>;
			&end_menu_function(3);
		}

		if ($menu_choice[0] eq "1")
		{
			&main;
		}

		if ($menu_choice[0] eq "2")
		{
			print "\nThanx for using this great program!!! \nEXITING...\n\n";
		}
	}	
}


###########################################################################
#	Help Functions
###########################################################################

sub help_menu
{
	my $selection;

	print `clear`;
	print "\n    #####   HELP MENU   #####\n\n";
	print "(1) General Information\n";
	print "(2) Description of the Main Menu Functions\n";
	print "(3) Back to Main Menu\n";
	print "\n\nPlease select option\n>";	
	$selection = <STDIN>;
	chop $selection;

	if ($selection eq "1")
		{&general_info;}
	elsif($selection eq "2") 
		{&main_menu_description;}
	elsif($selection eq "3") 
		{&main;}
	else
	{
		print "\n*** Invalid Choice ***\n\n";
		print "\n> Press <ENTER>"; <STDIN>;
		&help_menu;	
	}
}

sub general_info
{
	my $selection;

	print `clear`;
	print "\n(1) Information about the Program\n";
	print "(2) Required Modules\n";
	print "\n\nPlease select option\n>";	
	$selection = <STDIN>;
	chop $selection;
	if (!($selection eq "1" || $selection eq "2"))
	{
		print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>"; <STDIN>;
		&general_info;	
	}
	
	if ($selection eq "1")
	{
		print `clear`;
		print "\n\tThe covariance program is a computational tool used for substrate";
		print "\nand cofactor prediction. The program is based on the output of the HSSP";
		print "\n(homology-derived secondary structure of proteins).";
		print "\nThe HSSP aligns proteins which have (currently) a 30% homology or better,";
		print "\ncompared to the parent structure. Since proteins with an identity of";
		print "\n25% or better are very likely to fold the same way, the HSSP output";
		print "\nprovides valuable information for folding.";
		print "\nThe covariance program allows the user to produce a table, protein"; 
		print "\nname list, fingerprint and/or an output of the species that code for"; 
		print "\nthe proteins. The table shows the percentual distribution of all amino";
		print "\nacids for each position and the number of proteins involved."; 
		print "\nThe fingerprint output contains a list of all residues that possess"; 
		print "\na high percentual distribution. The threshold is entered by the user."; 
		print "\nThere are two options to generate the protein name list. Option one";
		print "\nproduces a list of the proteins with the accession numbers, while option";
		print "\ntwo also includes the sequence for each protein.";  
		print "\nThe species output is a list that contains genus and species of the";
		print "\nprotein's origin. The entries are sorted by the `top level of taxonomy";
		print "\ntree hierarchy` according to NCBI (Eukaryota, Bacteria, Archaea...).";

		print "\n\n\tOnce one knows that a particular residue is involved in substrate"; 
		print "\nor cofactor binding, the program enables the user to isolate all proteins"; 
		print "\nthat contain this specific amino acid. There are several other options";
		print "\navailable to the user in order to apply restrictions.";
		print "\nProteins can be extracted by name, by excluding an amino acid at a"; 
		print "\ncertain location, by excluding an amino acid entirely, or by the number"; 
		print "\nof occurrences of a particular amino acid.";

		print "\n\n\tAfter a new subset of proteins is obtained, the percentual";
		print "\ndistribution of each amino acid is compared to the value before the";
		print "\nextraction took place. If a particular amino acid shows a significant";
		print "\nincrease in its percent value, the residue shows covarience.\n\n";
		&go_back;
		
	}
	if ($selection eq "2")
	{
		print `clear`;
		print "\nThis program requires two modules:";
		print "\n - HTML::FormatText";
		print "\n   (http://search.cpan.org/author/SBURKE/HTML-Format-2.03/lib/HTML/FormatText.pm)";
		print "\n - HTML::TreeBuilder";
		print "\n   (http://search.cpan.org/author/SBURKE/HTML-Tree-3.17/lib/HTML/TreeBuilder.pm)";
		print "\nFor more informarion please contact the corresponding web site.\n\n";
		&go_back;
	}
}

sub main_menu_description
{	
	my $selection;

	print `clear`;
	print "\n    #####   Description of the Main Menu Functions   #####\n\n";
	print "(1)  To specify the working directory\n";
	print "(2)  To create a workfile from HSSP (requires PDB ID)\n";
	print "(3)  To generate a table / fingerprint / name list / species output from workfile \n";
	print "(4)  To isolate a single AA at a certain location from workfile \n";
	print "(5)  To isolate proteins by name from workfile \n";
	print "(6)  To exclude a single AA at a certain location from workfile\n";
	print "(7)  To exculde an AA entirely from workfile\n";
	print "(8)  To isolate proteins with a certain number of one AA from workfile \n";
	print "(9)  To get an AA frequency from workfile\n";
	print "(10) To compare two tables\n";
	print "\n\nPlease select option \n>";	
	$selection = <STDIN>;
	chop $selection;

	if ($selection eq "1")
	{	
		print `clear`;
		print "\nThis function allows the user to view the main directory and the current";
		print "\nworking directory. The user is forced to specify the working directory within";
		print "\nthis function before any processing is done.";
		print "\n\nNote: The working directory is automatically created when the user enters a new";
		print "\n      PDB ID (Main Function (2))\n\n";
		&go_back;
	}
	elsif ($selection eq "2")
	{
		print `clear`;
		print "\nThis function will prompt the user to enter a valid PDB ID.";
		print "\n\nA working directory is automatically created for the entered";
		print "\nPDB ID, and all the files will be stored in this subdirectory. \n\nExample: 1HDC.dir\n";
		print "\nThen the function  downloads the particular HSSP file and";
		print "\nsaves it as <pdb_id.hssp>. \n\nExample: 1hdc.hssp\n"; 
		print "\nAfter that the function extracts the necessary information from";
		print "\nthe HSSP file such as aligned sequences, protein names, accession";
		print "\nnumbers, and total number of proteins. The information is stored ";
		print "\nas a workfile.\n\nExample: 1hdc.workfile\n"; 
		print "\nThe workfile is required for all the remaining functions except"; 
		print "\nfunction (10)."; 
	
		print "\n\nNote: If a PDB contains multiple chains for one entry, only the"; 
		print "\n      first chain listed will be used.\n\n"; 
		&go_back;
	}
	elsif ($selection eq "3")
	{
		print `clear`;
		print "\nThis function prompts the user to enter a valid workfile.";
		print "\nIs is used generate the following from any particular workfile:";
		print "\n\n - Table with the percentual amino acid distribution";
		print "\n - Protein name list, including accession numbers \n   (Output of the protein sequence is optional)";
		print "\n - Fingerprint with conserved residues";
		print "\n - Species output of the protein's origin";
		print "\n\nFor further information see \n\n - `General Information`  -->  `Information about the Program`\n\n";
		&go_back;
	}
	elsif ($selection eq "4")
	{
		print `clear`;
		print "\nThis function prompts the user to enter a valid workfile.";
		print "\nThen the user is asked to enter a amino acid (one letter code)";
		print "\nand its location.";
		print "\nThe function proceeds with the isolation of all proteins which";
		print "\ncontain that specific residue at this particular position.";
		&user_options;
		&go_back;
	}
	elsif ($selection eq "5")
	{
		print `clear`;
		print "\nThis function prompts the user to enter a valid workfile.";
		print "\nThen the user is asked to enter a name of a protein";
		print "\n(or a part of name).";
		print "\nThe function proceeds with the isolation of all proteins that";
		print "\nmatch with the entered name.";
		&user_options;
		&go_back;
	}
	elsif ($selection eq "6")
	{
		print `clear`;
		print "\nThis function prompts the user to enter a valid workfile.";
		print "\nThen the user is asked to enter a amino acid (one letter code)";
		print "\nand its location.";
		print "\nThe function proceeds with the isolation of all proteins which";
		print "\ndo not contain that specific residue at this particular position.";
		&user_options;
		&go_back;
	}
	elsif ($selection eq "7")
	{
		print `clear`;
		print "\nThis function prompts the user to enter a valid workfile.";
		print "\nThen the user is asked to enter a amino acid (one letter code).";
		print "\nThe function proceeds with the isolation of all proteins which";
		print "\ndo not contain that specific residue at all.";
		&user_options;
		&go_back;
	}
	elsif ($selection eq "8")
	{
		print `clear`;
		print "\nThis function prompts the user to enter a valid workfile.";
		print "\nThen the user is asked to enter a amino acid (one letter code).";
		print "\nThe user has the option tho choose a range or an occurrence";
		print "\nThe function proceeds with the isolation of all proteins which";
		print "\nhave the defined number of a particular residue.";
		&user_options;
		&go_back;
	}
	elsif ($selection eq "9")
	{
		print `clear`;
		print "\nThis function prompts the user to enter a valid workfile.";
		print "\nThen the user is asked to enter a amino acid (one letter code).";
		print "\nThe user has the option to choose a range or an occurrence.";
		print "\nThe function will produce a frequency output which shows the ";
		print "\nnumber of proteins that have the defined residue in each position.";
		print "\nThe range and threshold help to apply restrictions.\n\n";
		&go_back;

	}
	elsif ($selection eq "10")
	{
		print `clear`;
		print "\nThe program prompts the user to enter two table files (xxx.table).";
		print "\nThe first table is the more original one. The second table";
		print "\nis compared to the first one.";
		print "\nThe output is a table that shows the level of covariance.\n\n";
		&go_back;

	}
	else 
		{print "\n*** Invalid Selection ***\n\n"; print "\n> Press <ENTER>"; <STDIN>; &main_menu_description;}
}

sub go_back
{
	my $selection;

	print "\n(1) To go back to the Help Menu\n";
	print "(2) To go back to the Main Menu\n";
	print "\n\nPlease select option\n>";	
	$selection = <STDIN>;
	chop $selection;
	if (!($selection == 1 || $selection == 2))
	{
		print "\n*** Invalid Choice ***\n\n"; print "\n> Press <ENTER>"; <STDIN>;
		&go_back;	
	}

	if ($selection == 1)
		{&help_menu;}
	if ($selection == 2)
		{&main;}
}

sub user_options
{
	print "\n\nThe user can choose several option after the restriction took place";
	print "\nto generate the following output:";
	print "\n\n - Table with the percentual amino acid distribution";
	print "\n - Protein name list, including accession numbers \n   (Output of the protein sequence is optional)";
	print "\n - Fingerprint with conserved residues";
	print "\n - Species output of the protein's origin";
	print "\n - A table that shows the level of covariance: \n   The program prompts the user to enter minimum difference";
	print "\n   (Change in percentage for a particular residue)";
	print "\n\nFor further information see \n\n - `General Information`  -->  `Information about the Program`\n\n";
}






sub option5
{
	my $search_name;
	my $search_output;
	my @accession_temp_1;
	my @accession_temp_2;
	my @accession;
	my @accession_OK;
	my $sequence;
	my $entry_info;
	my @seq_length_temp_1;
	my @seq_length_temp_2;
	my @seq_length;
	my @name_temp_1;
	my @name_temp_2;
	my @name_temp_3;
	my @protein_names;
	my $count;
	my $clustalw_file;
	my @clustalw_lines_temp;
	my @clustalw_lines;
	my $k;
	my $c;
	my $index;
	my $spacer;
	my $space_lines;
	my @seq_temp;
	my @clustalw_sequences;
	my @seq_id;
	my $temp_concat;
	my $length;
	my $overall_seq_length;
	
#	print "\n\nPlease enter search key words\n>";
#	$search_name = <STDIN>;
#	chop $search_name;

	### gluconate+AND+deoxy+AND+reductase



#	$search_output = get "http://us.expasy.org/cgi-bin/sprot-search-ful?SEARCH=$search_name&S=on&T=on";
	
	
	$search_output = get "http://us.expasy.org/cgi-bin/sprot-search-ful?SEARCH=7-alpha-hydroxysteroid+dehydrogenase&S=on&T=on";


	@accession_temp_1 = split(/pl\?/,$search_output);
	shift @accession_temp_1;
	foreach(@accession_temp_1)
	{
		@accession_temp_2 = split(/\"/,$_);
		push @accession, $accession_temp_2[0];
	}
	pop @accession;
		
	open FASTA, ">/home/habegger/COVAR/fasta.txt";
	
	$count = 0;
	foreach(@accession)
	{
		$entry_info = get "http://us.expasy.org/cgi-bin/niceprot.pl?$_";
		@seq_length_temp_1 = split(/\>Length\</,$entry_info);
		@seq_length_temp_2 = split(/\sAA/,$seq_length_temp_1[1]);
		@seq_length = split(/\<b\>/,$seq_length_temp_2[0]);
		
		if($seq_length[1] > 0 && $seq_length[1] < 500)
		{
			push @accession_OK, $_;
		
			$sequence = get "http://us.expasy.org/cgi-bin/get-sprot-fasta?$_";
			print FASTA $sequence;
		
			@name_temp_1 = split(/Protein name/,$entry_info);
			@name_temp_2 = split(/\<\/b\>/,$name_temp_1[2]);
			@name_temp_3 = split(/\<b\>/,$name_temp_2[0]);
			push @protein_names, $name_temp_3[1];
			
			print "\n$name_temp_3[1]   $seq_length[1]";
			
			$count++;
		}
	}

	print "\n\nNumber of hits: $count\n\n";

	close FASTA;
	
	`/home/habegger/COVAR/clustalx1.83.sgi/clustalx`;
	
	$clustalw_file = `cat /home/habegger/COVAR/fasta.aln`;
	@clustalw_lines_temp = split(/\n/,$clustalw_file);
	
	shift @clustalw_lines_temp;
	shift @clustalw_lines_temp;
	shift @clustalw_lines_temp;
	pop @clustalw_lines_temp;
	
	$k = 0;
	$index = 0;
	$space_lines = 2;
	
	foreach(@clustalw_lines_temp)
	{
		if($k < $count)
		{
			$spacer = 0;
			$clustalw_lines[$index] = $_;
			$index++;
			$k++;
		}
		else
		{
			$spacer++;
			if ($spacer == $space_lines)
				{$k = 0;}
		}	
	}
	
	@clustalw_sequences = ();
	
	$k = 0;
	$index = 0;
	$temp_concat = "";	
	
	foreach(@clustalw_lines)
	{
		@seq_temp = split(/\s+/,$_);
		$temp_concat = "$clustalw_sequences[$k % $count]$seq_temp[1]";
		$clustalw_sequences[$k % $count] = $temp_concat;
		if($k < $count)
			{$seq_id[$k % $count] = $seq_temp[0];}
		$k++;
	}
	
	foreach(@seq_id)
	{
		for($k = 0;$k < $count; $k++)
		{
			if($_ =~ $accession_OK[$k])
			{
				$accession_OK[$k] = "[$accession_OK[$k]]";
				$length = length($accession_OK[$k]);
				for ($c = $length ; $c < 11; $c++)
					{$accession_OK[$k] = "$accession_OK[$k] ";}
				$accession_OK[$k] = " $accession_OK[$k]";
				
				$_ = "$accession_OK[$k]$protein_names[$k]";
			}
		}
	}
	
	$overall_seq_length = length($clustalw_sequences[0]);
	
	open TEST, ">/home/habegger/COVAR/1HDC.dir/out.workfile";
	
	for($k = 0;$k < $count; $k++)
	{
		print TEST ">$seq_id[$k]\n$clustalw_sequences[$k]\n";
	}
	
	print TEST "\n\n";
	
	for($k = 1;$k <= $overall_seq_length; $k++)
	{
		print TEST "&& $k\n"
	}
	
	print TEST "\n\n";
	print TEST "<< $count";
	
	close TEST;

}


sub option6
{
	my $blast_name;
	my $blast_file;
	my @blast_lines;
	my @temp_access_1;
	my @temp_access_2;
	my @blast_access;
	my @accession_OK;
	my $num_of_access;
	my $entry_info;
	my @seq_length_temp_1;
	my @seq_length_temp_2;
	my @seq_length;
	my $sequence;
	my @name_temp_1;
	my @name_temp_2;
	my @name_temp_3;
	my @protein_names;
	my $count;
	my $clustalw_file;
	my @clustalw_lines_temp;
	my @clustalw_lines;
	my $k;
	my $c;
	my $r;
	my $index;
	my $spacer;
	my $space_lines;
	my @seq_temp;
	my @clustalw_sequences;
	my @seq_id;
	my $temp_concat;
	my $length;
	my $overall_seq_length;
	my $min;
	my $max;
	my $id;
	my $protein_id;
	my $length_name;
	my $eval_name;
	my @evalues;
	my $eval;
	my $control;
	
	
	print "\nPlease enter BLAST file\n>";
	$blast_name = <STDIN>;
	chop $blast_name;
	print "\nPlease enter minumum sequence length\n>";
	$min = <STDIN>;
	chop $min;
	print "\nPlease enter maximum sequence length\n>";
	$max = <STDIN>;
	chop $max;
	
	$id = "Accession #";
	$protein_id = "Protein Name";
	$eval_name = "E-value";
	$length_name = "Length"; 
	
	
	$blast_file = `cat $dir_name/$blast_name`;
	
	
	@blast_lines = split(/\n/,$blast_file);
	$num_of_access = 0;

	foreach(@blast_lines)
	{
		if (!($_ =~ />/))
		{	
			if ($_ =~ /tr!/ || $_ =~ /tn!/)
			{
				@temp_access_1 = split(/\!/,$_);
				@temp_access_2 = split(/\s/,$temp_access_1[1]); 
				push @blast_access, $temp_access_2[0];
				$num_of_access++;
				$eval = pop @temp_access_2;
				push @evalues, $eval;
				
			}
	
			if ($_ =~ /sp!/)
			{
				@temp_access_1 = split(/\!/,$_);
				push @blast_access, $temp_access_1[1];
				$num_of_access++;
				@temp_access_2 = split(/\s+/,$temp_access_1[2]);
				$eval = pop @temp_access_2;
				push @evalues, $eval;
			}
		}
	}
	
	print "\n\nNumber of Sequences obtained by BLAST: $num_of_access\n\n\n\n";

	open FASTA, ">$dir_name/fasta.txt";
	
	$count = 0;
	$control = 0;
		
	printf "%12s", $id;
	printf "%10s", $length_name;
	printf "%12s", $eval_name;
	print  "        $protein_id\n";
	print "------------------------------------------------------------------------------------------------\n\n";
	
	foreach(@blast_access)
	{
		$length = length($_);
		$entry_info = get "http://us.expasy.org/cgi-bin/niceprot.pl?$_";
		if ($length == 6)
		{
			@seq_length_temp_1 = split(/\>Length\</,$entry_info);
			@seq_length_temp_2 = split(/\sAA/,$seq_length_temp_1[1]);
			@seq_length = split(/\<b\>/,$seq_length_temp_2[0]);
		}
		else
		{
			@seq_length_temp_1 = split(/\n/,$entry_info);
			@seq_length_temp_2 = split(/\s+/,$seq_length_temp_1[0]);
			$seq_length[1] = $seq_length_temp_2[4];
		}
		
		if($seq_length[1] >= $min && $seq_length[1] <= $max)
		{
			push @accession_OK, $_;
		
			$sequence = get "http://us.expasy.org/cgi-bin/get-sprot-fasta?$_";
			print FASTA $sequence;
			
			if ($length == 6)
			{
				@name_temp_1 = split(/Protein name/,$entry_info);
				@name_temp_2 = split(/\<\/b\>/,$name_temp_1[2]);
				@name_temp_3 = split(/\<b\>/,$name_temp_2[0]);
				push @protein_names, $name_temp_3[1];
				printf "%12s", $_;
				printf "%10s", $seq_length[1];
				printf "%12s", $evalues[$control];
				print   "        $name_temp_3[1]\n";
			
			}
			else
			{
				@name_temp_1 = split(/\s+/,$seq_length_temp_1[5]);
				push @protein_names, $name_temp_1[1];
				printf "%12s", $_;
				printf "%10s", $seq_length[1];
				printf "%12s", $evalues[$control];
				print   "        $name_temp_3[1]\n";
			}
			
			$count++;
		}
		
		$control++;
	}

	
	print "\n\nNumber of sequences that passed the length requirement: $count\n\n";

	close FASTA;
	
	print "\nDo ClustalW alignment (fasta.txt) .......\n\n";
	
	`/home/habegger/COVAR/clustalx1.83.sgi/clustalx`;
	
	$clustalw_file = `cat $dir_name/fasta.aln`;
	@clustalw_lines_temp = split(/\n/,$clustalw_file);
	
	shift @clustalw_lines_temp;
	shift @clustalw_lines_temp;
	shift @clustalw_lines_temp;
	pop @clustalw_lines_temp;
	
	$k = 0;
	$index = 0;
	$space_lines = 2;
	
	foreach(@clustalw_lines_temp)
	{
		if($k < $count)
		{
			$spacer = 0;
			$clustalw_lines[$index] = $_;
			$index++;
			$k++;
		}
		else
		{
			$spacer++;
			if ($spacer == $space_lines)
				{$k = 0;}
		}	
	}
	
	@clustalw_sequences = ();
	
	$k = 0;
	$index = 0;
	$temp_concat = "";	
	
	foreach(@clustalw_lines)
	{
		@seq_temp = split(/\s+/,$_);
		$temp_concat = "$clustalw_sequences[$k % $count]$seq_temp[1]";
		$clustalw_sequences[$k % $count] = $temp_concat;
		if($k < $count)
			{$seq_id[$k % $count] = $seq_temp[0];}
		$k++;
	}
	
	open RIT, ">/home/habegger/COVAR/test.id";
	
	open MATCH, ">/home/habegger/COVAR/match.txt";
	
	foreach(@seq_id)
	{
		print RIT "$_\n";
		
		for($k = 0;$k < $count; $k++)
		{
			if($accession_OK[$k] =~ /$_/)
			{
				print MATCH "$accession_OK[$k]   ==  $_\n";
			
				$accession_OK[$k] = "[$accession_OK[$k]]";
				$length = length($accession_OK[$k]);
				for ($c = $length ; $c < 11; $c++)
					{$accession_OK[$k] = "$accession_OK[$k] ";}
				$accession_OK[$k] = " $accession_OK[$k]";
				
				$_ = "$accession_OK[$k]$protein_names[$k]";
			}
		}
	}
	close RIT;
	close MATCH;
	
	$overall_seq_length = length($clustalw_sequences[0]);
	
	open TEST, ">/home/connare/RIBOSOMAL_DIR/HISTONES.dir/blast.workfile";
	
	for($k = 0;$k < $count; $k++)
	{
		print TEST ">$seq_id[$k]\n$clustalw_sequences[$k]\n";
	}
	
	print TEST "\n\n";
	
	for($k = 1;$k <= $overall_seq_length; $k++)
	{
		print TEST "&& $k\n"
	}
	
	print TEST "\n\n";
	print TEST "<< $count";
	
	close TEST;
	
	print "\nGenerating new workfile: /home/habegger/COVAR/1HDC.dir/blast.workfile\n\n";
	
	&end_menu_function(2);

	
}
