#!/usr/bin/perl


# Does as the program title suggests. This particular program inputs the translation file from sonjays codon analysis program
# then outputs a fasta file. 
#
#
#	get_pdb_from_hssp.pl - This particular program inputs the translation file from sanjays codon analysis program
#					then outputs a fasta file.
#    Copyright (C) 2006  bobby Huether
#	email: bhuether@hwi.buffalo.edu
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License along
#    with this program; if not, write to the Free Software Foundation, Inc.,
#    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

use LWP::Simple;
use FileHandle;
STDOUT->autoflush(1);
use POSIX qw(ceil floor);
require HTML::TreeBuilder;
require HTML::FormatText;

&intro;
&main;

sub intro
{
print "
	get_pdb_from_hssp.pl - This particular program inputs the translation file from sanjays codon analysis program
					then outputs a fasta file.
    Copyright (C) 2006  bobby Huether
	email: bhuether@hwi.buffalo.edu

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n\n\n";
}

sub main
{

$list = `cat test.txt`;


open (FILE,">pdb.output2");
print FILE "PDB\t#HSSP\t#PDB\tPDB (iden)\n";
$proteinnum =0;

my @accno = split(/\s+/,$list);

	foreach(@accno)
		{$proteinnum ++;
		print "$proteinnum.";
		$count= 0;
		$pdb  = "";
		
		
		$hssp_file = get "http://srs.ebi.ac.uk/srsbin/cgi-bin/wgetz?-e+[HSSP-ID:'$_']";
             ######################
		open TEMP, ">temp_file";
		print TEMP $hssp_file;
		close TEMP;
	
		if ($hssp_file =~ /no entries found/)	
		{
			print "\n*** Invalid PDB ID ***\n\n"; 
		}
		else
		{
			
			
			$tree = HTML::TreeBuilder->new->parse_file("temp_file");
			$formatter = HTML::FormatText->new(leftmargin => 0, rightmargin => 100);
			$hssp_file  = $formatter->format($tree);
			
		}
		open TEMP, ">temp_file";
		print TEMP $hssp_file;
		close TEMP;
		

			 #######################
		
		 
		if($hssp_file)
		{
		print FILE "$_\t";
		
		
		################ how many chains?
		
			@chain1 = split(/KCHAIN/, $hssp_file);
			@chain = split(/chain/, $chain1[1]);
		if(!($chain[0] =~ /\d/))
			{
			@chain1 = split(/NCHAIN/, $hssp_file);
			@chain = split(/chain/, $chain1[1]);
			}
		################## how many proteins
		@protein1 = split(/NALIGN/, $hssp_file);
		@protein = split(/\s+/,$protein1[1]);
		if($chain[0] >=1)
			{$prot = $protein[1]/$chain[0];}
		
		print FILE 	"$prot\t";	

		################# get names
		@names1 = split(/PROTEINS :/,$hssp_file);
		@names = split(/\#\# ALIGNMENTS/,$names1[1]);
		 
		@hssp_first = split(/\n/,$names[0]);
		shift @hssp_first;
		shift @hssp_first;
		################# find pdb and identity
		
		foreach(@hssp_first)
			{
			@letter = split(//,$_);
			
			if($letter[20] =~ /\w/)
				{
				$count++;
				}
		}
		print FILE "$count\t";
		$count =0;
		foreach(@hssp_first)
			{
			@letter = split(//,$_);
			
			if($letter[20] =~ /\w/)
				{
				$count++;
				print FILE "- $letter[20]$letter[21]$letter[22]$letter[23] ";
				print  FILE "($letter[28]$letter[29]$letter[30]$letter[31])  ";
				if($count =~7 || $count =~ 14 || $count =~ 21 || $count =~ 28 || $count =~ 34)
					{print FILE "\n\t\t\t";}
				}
				
												
			}
		print FILE"\n";
		
		}
		
		}
						

}

