#!/acrm/usr/local/bin/perl #************************************************************************* # # Program: xml2flat # File: # # Version: V1.0 # Date: 15.12.05 # Function: Dump the PDBSPROTEC XML file in flat file format # # Copyright: (c) UCL / Dr. Andrew C. R. Martin 2005 # Author: Dr. Andrew C. R. Martin # Address: Biomolecular Structure & Modelling Unit, # Department of Biochemistry & Molecular Biology, # University College, # Gower Street, # London. # WC1E 6BT. # Phone: +44 (0)207 679 7034 # EMail: andrew@bioinf.org.uk # martin@biochem.ucl.ac.uk # Web: http://www.bioinf.org.uk/ # # #************************************************************************* # # This program is not in the public domain, but it may be copied # according to the conditions laid out in the accompanying file # COPYING.DOC # # The code may be modified as required, but any modifications must be # documented so that the person responsible can be identified. If # someone else breaks this code, I don't want to be blamed for code # that does not work! # # The code may not be sold commercially or included as part of a # commercial product except as described in the file COPYING.DOC. # #************************************************************************* # # Description: # ============ # This is really just a little demo program to allow you to convert # the XML format to the flat file format. It shows you how to use # Perl/DOM to parse the data. Warning, since it's DOM, everything # gets loaded into memory, so 1Gig of RAM is a good idea... # #************************************************************************* # # Usage: # ====== # #************************************************************************* # # Revision History: # ================= # #************************************************************************* use strict; use XML::DOM; my($file, $parser, $doc, $pdbse_tag, $rowcount); $file = shift @ARGV; # Get the filename from the command line $parser = XML::DOM::Parser->new(); $doc = $parser->parsefile($file); print " pdbcode | chainid | res1 | res2 | sprot | ec\n"; print "---------+---------+------+------+--------+------------\n"; $rowcount = 0; foreach $pdbse_tag ($doc->getElementsByTagName('pdb_sprot_ec')) { my($pdb, $chain); my($chain_tag, $region_tag); $pdb = $pdbse_tag->getAttribute('pdb'); foreach $chain_tag ($pdbse_tag->getElementsByTagName('chain')) { $chain = $chain_tag->getAttribute('id'); foreach $region_tag ($chain_tag->getElementsByTagName('region')) { my ($ec, $res1, $res2, $sprot); my ($ec_tag); $res1 = $region_tag->getAttribute('res1'); $res2 = $region_tag->getAttribute('res2'); $sprot = $region_tag->getAttribute('sprot'); foreach $ec_tag ($region_tag->getElementsByTagName('ec')) { my $ec1 = $ec_tag->getAttribute('ec1'); my $ec2 = $ec_tag->getAttribute('ec2'); my $ec3 = $ec_tag->getAttribute('ec3'); my $ec4 = $ec_tag->getAttribute('ec4'); $ec = "$ec1.$ec2.$ec3.$ec4"; printf " %4s | %s | %-4s | %-4s | %s | %s\n", $pdb, $chain, $res1, $res2, $sprot, $ec; $rowcount++; } } } } print "($rowcount rows)\n\n";