#!/usr/local/bin/perl
#*************************************************************************
#
#   Program:    makedomains
#   File:       makedomains.perl
#   
#   Version:    V1.1
#   Date:       11.10.99
#   Function:   Create split domain files from the CATH domain list
#   
#   Copyright:  (c) Reading University / Dr. Andrew C. R. Martin 1999
#   Author:     Dr. Andrew C. R. Martin
#   EMail:      a.c.r.martin@rdg.ac.uk
#               andrew@bioinf.org.uk
#               
#*************************************************************************
#
#   This program is not in the public domain, but it may be copied
#   freely providing this notice is retained.
#
#   The code may be modified as required, but any modifications must be
#   documented so that the person responsible can be identified. If 
#   someone else breaks this code, I don't want to be blamed for code 
#   that does not work! 
#
#   The code may not be sold commercially or included as part of a 
#   commercial product.
#
#*************************************************************************
#
#   Description:
#   ============
#   Reads the CATH domain definition file and spews out the individual
#   domain files from the PDB
#
#*************************************************************************
#
#   Usage:
#   ======
#   makedomains.perl domlist &> makedomains.out
#
#*************************************************************************
#
#   Revision History:
#   =================
#   V1.1  11.10.99 Deletes the output file if the input PDB file didn't
#                  exist (i.e. was obsoleted)
#
#*************************************************************************
# Customise these for your site. The following assumes that your split
# domain files will live in /data/dompdb/ and that your PDB files are
# called /data/pdb/pdbXXXX.ent (where XXXX is the PDB code in lower case)
#
$outdir = "/data/dompdb/";                # Output directory for domain files
$pdbprep = "/data/pdb/pdb";               # String to put on the front of a
                                          # PDB code to get a filename
$pdbext = ".ent";                         # String to put on the end of a PDB
                                          # code to get a filename
#*************************************************************************

$| = 1;                                   # Turn on flushing
while(<>)                                 # Loop over the domains file
{
    chomp;
    @fields = split;

    # Obtain the basic identifying data
    $pdb        = substr($fields[0],0,4);
    $chain      = substr($fields[0],4,1);
    $ndomains   = substr($fields[1],1) * 1;
    $nfragments = substr($fields[2],1) * 1;

    # Start looking at the domains
    $base       = 3;
    # For each domain
    for($i=0; $i<$ndomains; $i++)
    {
        $nseg = $fields[$base++];
        $domnum = $i+1;
        $domid  = $pdb . $chain . $domnum;

        print "Processing $domid\n";

        # For each segment of this domain, print the identifier and
        # residue range
        for($j=0; $j<$nseg; $j++)
        {
            # Find the start of the segment 
            if($fields[$base] eq "0")             # Chain label
            {
                $start = "";
            }
            else
            {
                $start = $fields[$base] . ".";
            }
            $base++;

            $start .= $fields[$base++];           # Residue number

            if($fields[$base] ne "-")             # Insert code
            {
                $start .= $fields[$base];
            }
            $base++;


            # Find the end of the segment 
            if($fields[$base] eq "0")             # Chain label
            {
                $stop = "";
            }
            else
            {
                $stop = $fields[$base] . ".";
            }
            $base++;

            $stop .= $fields[$base++];            # Residue number

            if($fields[$base] ne "-")             # Insert code
            {
                $stop .= $fields[$base];
            }
            $base++;

            print "\t$start\t$stop\n";

            # Append the segment to an output file
            if($j==0)
            {
                system("getpdb $start $stop $pdbprep$pdb$pdbext > $outdir/$domid");
                DeleteIfObsolete("$pdbprep$pdb$pdbext", "$outdir/$domid");
            }
            else
            {
                system("getpdb $start $stop $pdbprep$pdb$pdbext >> $outdir/$domid");
                DeleteIfObsolete("$pdbprep$pdb$pdbext", "$outdir/$domid");
            }

        }
    }
}

sub DeleteIfObsolete
{
    my($infile, $outfile) = @_;

    unlink $outfile if(! -e $infile);
}
