Bioplib
Protein Structure C Library
 All Data Structures Files Functions Variables Typedefs Macros Pages
ExtractZonePDB.c
Go to the documentation of this file.
1 /************************************************************************/
2 /**
3 
4  \file ExtractZonePDB.c
5 
6  \version V1.19
7  \date 17.02.16
8  \brief PDB linked list manipulation
9 
10  \copyright (c) UCL / Dr. Andrew C. R. Martin 1992-2016
11  \author Dr. Andrew C. R. Martin
12  \par
13  Institute of Structural & Molecular Biology,
14  University College London,
15  Gower Street,
16  London.
17  WC1E 6BT.
18  \par
19  andrew@bioinf.org.uk
20  andrew.martin@ucl.ac.uk
21 
22 **************************************************************************
23 
24  This code is NOT IN THE PUBLIC DOMAIN, but it may be copied
25  according to the conditions laid out in the accompanying file
26  COPYING.DOC.
27 
28  The code may be modified as required, but any modifications must be
29  documented so that the person responsible can be identified.
30 
31  The code may not be sold commercially or included as part of a
32  commercial product except as described in the file COPYING.DOC.
33 
34 **************************************************************************
35 
36  Description:
37  ============
38 
39 
40 **************************************************************************
41 
42  Usage:
43  ======
44 
45 **************************************************************************
46 
47  Revision History:
48  =================
49 - V1.0 22.02.94 Original release
50 - V1.1 23.05.94 Added FindNextChainPDB()
51 - V1.2 05.10.94 KillSidechain() uses BOOL rather than int
52 - V1.3 24.07.95 Added TermPDB()
53 - V1.4 25.07.95 Added GetPDBChainLabels()
54 - V1.5 26.09.95 Fixed bug in TermPDB()
55 - V1.6 12.10.95 Added DupePDB(), CopyPDBCoords()
56 - V1.7 23.10.95 Moved FindResidueSpec() to ParseRes.c
57 - V1.8 10.01.96 Added ExtractZonePDB()
58 - V1.9 14.03.96 Added FindAtomInRes()
59 - V1.10 08.10.99 Initialised some variables
60 - V1.11 22.03.05 Extracted range is limited by specified residues
61 - V1.12 22.03.06 Modified ExtractZonePDB() to allow non-exact ranges
62 - V1.13 29.10.10 Fixed bug when end of zone was last residue in a chain
63 - V1.14 04.02.14 Use CHAINMATCH By: CTP
64 - V1.15 07.07.14 Use bl prefix for functions By: CTP
65 - V1.16 19.08.14 Renamed function to blExtractZonePDBAsCopy() By: CTP
66 - V1.17 07.10.15 Added function blExtractNotZonePDBAsCopy() and
67  blExtractNotZonePDBAsCopy() By: ACRM
68 - V1.18 08.12.15 Modified blExtractZoneAsCopy() to check for exact
69  match first
70 
71 *************************************************************************/
72 /* Doxygen
73  -------
74  #GROUP Handling PDB Data
75  #SUBGROUP Searching the PDB linked list
76  #FUNCTION blExtractZonePDBAsCopy()
77  Reduces a PDB linked list to those residues within a specified zone
78  forming a new linked list. Uses separate chain, residue number and
79  insert rather than residue specifications.
80 
81  #FUNCTION blExtractZoneSpecPDBAsCopy()
82  Reduces a PDB linked list to those residues within a specified zone
83  forming a new linked list. Uses residue specifications ([c]nnn[i])
84  rather than separate chain, residue number and insert.
85 
86  #FUNCTION blExtractNotZonePDBAsCopy()
87  Reduces a PDB linked list to those residues outside a specified zone
88  forming a new linked list. Uses separate chain, residue number and
89  insert rather than residue specifications.
90 
91  #FUNCTION blExtractNotZoneSpecPDBAsCopy()
92  Reduces a PDB linked list to those residues outside a specified zone
93  forming a new linked list. Uses residue specifications ([c]nnn[i])
94  rather than separate chain, residue number and insert.
95 */
96 /************************************************************************/
97 /* Includes
98 */
99 #include <math.h>
100 #include <stdlib.h>
101 
102 #include "MathType.h"
103 #include "SysDefs.h"
104 #include "pdb.h"
105 #include "macros.h"
106 #include "general.h"
107 
108 /************************************************************************/
109 /* Defines and macros
110 */
111 
112 /************************************************************************/
113 /* Globals
114 */
115 
116 /************************************************************************/
117 /* Prototypes
118 */
119 
120 /************************************************************************/
121 /*>PDB *blExtractZonePDBAsCopy(PDB *inpdb, char *chain1, int resnum1,
122  char *insert1, char *chain2, int resnum2,
123  char *insert2)
124  -----------------------------------------------------------------------
125 *//**
126 
127  \param[in] *inpdb Input PDB linked list
128  \param[in] *chain1 Start residue chain name
129  \param[in] resnum1 Start residue number
130  \param[in] *insert1 Start residue insert code
131  \param[in] *chain2 End residue chain name
132  \param[in] resnum2 End residue number
133  \param[in] *insert2 End residue insert code
134  \return PDB linked list of the region of interest.
135 
136  Reduces a PDB linked list to those residues within a specified zone.
137  Note that the PDB linked list is duplicated before extraction so
138  pointers do not match those in the input PDB linked list. Excess
139  records in the new PDB linked list are freed.
140 
141 - 10.01.96 Original By: ACRM
142 - 22.03.06 Modified to allow non-exact zones. i.e. the extracted zone
143  will be the widest subset of the specified zone. So, if
144  you specifiy 30-35Z and the PDB file only has 30-35B
145  then that will be extracted.
146 - 29.10.10 Fixed extraction where end of zone matched last residue in
147  a chain
148 - 04.02.14 Use CHAINMATCH By: CTP
149 - 07.07.14 Use bl prefix for functions By: CTP
150 - 19.08.14 Renamed function to blExtractZonePDBAsCopy() By: CTP
151 - 08.12.15 Now checks for an exact match first to deal with 6INS that
152  uses the insert code for something different! By: ACRM
153 - 17.02.16 Added fix on check for exact match - prev wasn't being set
154  correctly if the match was to the first residue in the file
155 
156 *** TODO - This doesn't deal with CONECT information properly! ***
157 */
159  char *chain1, int resnum1, char *insert1,
160  char *chain2, int resnum2, char *insert2)
161 {
162  PDB *pdb, *p,
163  *start = NULL,
164  *last = NULL,
165  *prev = NULL;
166 
167  /* Duplicate the PDB linked list */
168  if((pdb = blDupePDB(inpdb))==NULL)
169  return(NULL);
170 
171  /* Try to find the exact residue specification, setting start to the
172  start of that residue and prev to the last atom of the previous
173  residue
174  */
175  if((start=blFindResidue(pdb, chain1, resnum1, insert1))!=NULL)
176  {
177  /* Find the previous atom */
178  if(start==pdb)
179  {
180  prev=NULL;
181  }
182  else
183  {
184  for(prev=pdb; prev->next!=start; NEXT(prev)) ;
185  }
186 
187  }
188  else
189  {
190  /* We didn't find an exact match so find the residue that would
191  follow that residue specification.
192  Again prev will point to the last atom before the first atom in
193  the zone and start will point to the first atom in the zone
194  */
195  for(p=pdb; p!=NULL; NEXT(p))
196  {
197  if(CHAINMATCH(p->chain,chain1) &&
198  ((p->resnum > resnum1) ||
199  ((p->resnum == resnum1) &&
200  (p->insert[0] >= insert1[0]))))
201  {
202  start = p;
203  break;
204  }
205  prev = p;
206  }
207  }
208 
209  /* If we didn't find anything then exit */
210  if(start==NULL)
211  {
212  FREELIST(pdb, PDB);
213  return(NULL);
214  }
215 
216 
217  /* See if we have an exact match to the residue requested
218  last will be the last atom in that residue
219  */
220  if((last=blFindResidue(pdb, chain2, resnum2, insert2))!=NULL)
221  {
222  PDB *nextres = blFindNextResidue(last);
223 
224  /* Step to the end of this residue */
225  for(; last->next!=nextres; NEXT(last)) ;
226  }
227  else
228  {
229  /* There was no exact match so find the one after that residue
230  specification.
231  Again, last will point to the last atom in the zone
232 
233  29.10.10 Also breaks out if chain1 and chain2 are the same but
234  we've now come to a different chain. This fixes a bug where
235  the code wouldn't break out if resnum2 was the last residue in
236  a chain By: ACRM
237  */
238  for(p=start; p!=NULL; NEXT(p))
239  {
240  if(CHAINMATCH(p->chain,chain2) && /* If same chain and... */
241  ((p->resnum > resnum2) || /* Residue number exceeded or*/
242  ((p->resnum == resnum2) && /* Resnum same and... */
243  (p->insert[0] > insert2[0])))) /* insert exceeded */
244  {
245  break;
246  }
247  /* Both zone ends are in the same chain so, if we got here we have
248  found the right chain. If the current chain is now different
249  from chain2, then we've gone off the end of the chain
250  */
251  if( CHAINMATCH(chain1,chain2) &&
252  !CHAINMATCH(p->chain,chain2))
253  {
254  break;
255  }
256  last = p;
257  }
258  }
259 
260  if(last==NULL)
261  {
262  FREELIST(pdb, PDB);
263  return(NULL);
264  }
265 
266  /* Free linked list after 'last' */
267  if(last->next != NULL)
268  {
269  FREELIST(last->next, PDB);
270  last->next = NULL;
271  }
272 
273  /* Unlink 'start' from rest of linked list and free memory before
274  'start'
275  */
276  if(prev != NULL)
277  {
278  prev->next = NULL;
279  FREELIST(pdb, PDB);
280  }
281 
282  return(start);
283 }
284 
285 
286 /************************************************************************/
287 /*>PDB *blExtractZoneSpecPDBAsCopy(PDB *pdb, char *firstRes,
288  char *lastRes)
289  ---------------------------------------------------------
290 *//**
291  \param[in] pdb PDB linked list
292  \param[in] firstRes Residue spec ([chain]resnum[insert])
293  \param[in] lastRes Residue spec ([chain]resnum[insert])
294 
295  Extracts a zone from a PDB linked list, making a copy of the original
296  list.
297 
298 - 08.10.14 Original By: ACRM
299 */
300 PDB *blExtractZoneSpecPDBAsCopy(PDB *pdb, char *firstRes, char *lastRes)
301 {
302  char chain1[8], chain2[8],
303  insert1[8], insert2[8];
304  int resnum1, resnum2;
305  PDB *zone = NULL;
306 
307  if(blParseResSpec(firstRes, chain1, &resnum1, insert1) &&
308  blParseResSpec(lastRes, chain2, &resnum2, insert2))
309  {
310  zone = blExtractZonePDBAsCopy(pdb,
311  chain1, resnum1, insert1,
312  chain2, resnum2, insert2);
313  }
314  return(zone);
315 }
316 
317 /************************************************************************/
318 /*>PDB *blExtractNotZonePDBAsCopy(PDB *inpdb, char *chain1, int resnum1,
319  char *insert1, char *chain2, int resnum2,
320  char *insert2)
321  -----------------------------------------------------------------------
322 *//**
323 
324  \param[in] *inpdb Input PDB linked list
325  \param[in] *chain1 Start residue chain name
326  \param[in] resnum1 Start residue number
327  \param[in] *insert1 Start residue insert code
328  \param[in] *chain2 End residue chain name
329  \param[in] resnum2 End residue number
330  \param[in] *insert2 End residue insert code
331  \return PDB linked list of the region of interest.
332 
333  Reduces a PDB linked list to those residues outside a specified zone.
334  Note that the PDB linked list is duplicated before extraction so
335  pointers do not match those in the input PDB linked list. Excess
336  records in the new PDB linked list are freed.
337 
338 - 07.10.15 Original By: ACRM
339 - 08.10.15 Fixed return(FALSE) to return(NULL)
340 */
342  char *chain1, int resnum1, char *insert1,
343  char *chain2, int resnum2, char *insert2)
344 {
345  PDB *pdb = NULL,
346  *start = NULL,
347  *stop = NULL,
348  *p, *q;
349 
350  /* Find the start and stop of the zone in the old linked list */
351  if((start = blFindResidue(inpdb, chain1, resnum1, insert1)) == NULL)
352  return(NULL);
353  if((stop = blFindResidue(inpdb, chain2, resnum2, insert2)) == NULL)
354  return(NULL);
355  stop = blFindNextResidue(stop);
356 
357  for(p=inpdb; p!=start; NEXT(p))
358  {
359  if(pdb == NULL)
360  {
361  INIT(pdb, PDB);
362  q = pdb;
363  }
364  else
365  {
366  ALLOCNEXT(q, PDB);
367  }
368  if(q==NULL)
369  {
370  FREELIST(pdb, PDB);
371  return(NULL);
372  }
373 
374  blCopyPDB(q, p);
375  }
376 
377  for(p=stop; p!=NULL; NEXT(p))
378  {
379  if(pdb == NULL)
380  {
381  INIT(pdb, PDB);
382  q = pdb;
383  }
384  else
385  {
386  ALLOCNEXT(q, PDB);
387  }
388  if(q==NULL)
389  {
390  FREELIST(pdb, PDB);
391  return(NULL);
392  }
393 
394  blCopyPDB(q, p);
395  }
396 
397  blCopyConects(pdb, inpdb);
398 
399  return(pdb);
400 }
401 
402 
403 /************************************************************************/
404 /*>PDB *blExtractNotZoneSpecPDBAsCopy(PDB *pdb, char *firstRes,
405  char *lastRes)
406  ------------------------------------------------------------
407 *//**
408  \param[in] pdb PDB linked list
409  \param[in] firstRes Residue spec ([chain]resnum[insert])
410  \param[in] lastRes Residue spec ([chain]resnum[insert])
411 
412  Extracts atoms outside a zone from a PDB linked list, making a copy
413  of the original list.
414 
415 - 07.10.15 Original By: ACRM
416 */
417 PDB *blExtractNotZoneSpecPDBAsCopy(PDB *pdb, char *firstRes,
418  char *lastRes)
419 {
420  char chain1[8], chain2[8],
421  insert1[8], insert2[8];
422  int resnum1, resnum2;
423  PDB *zone = NULL;
424 
425  if(blParseResSpec(firstRes, chain1, &resnum1, insert1) &&
426  blParseResSpec(lastRes, chain2, &resnum2, insert2))
427  {
428  zone = blExtractNotZonePDBAsCopy(pdb,
429  chain1, resnum1, insert1,
430  chain2, resnum2, insert2);
431  }
432  return(zone);
433 }
434 
435 
#define ALLOCNEXT(x, y)
Definition: macros.h:251
Include file for PDB routines.
int resnum
Definition: pdb.h:310
#define NULL
Definition: array2.c:99
Definition: pdb.h:298
PDB * blExtractNotZoneSpecPDBAsCopy(PDB *pdb, char *firstRes, char *lastRes)
#define NEXT(x)
Definition: macros.h:249
PDB * blDupePDB(PDB *in)
Definition: DupePDB.c:113
Useful macros.
PDB * blExtractZonePDBAsCopy(PDB *inpdb, char *chain1, int resnum1, char *insert1, char *chain2, int resnum2, char *insert2)
PDB * blExtractZoneSpecPDBAsCopy(PDB *pdb, char *firstRes, char *lastRes)
void blCopyPDB(PDB *out, PDB *in)
Definition: CopyPDB.c:108
BOOL blCopyConects(PDB *out, PDB *in)
Definition: BuildConect.c:574
Header file for general purpose routines.
PDB * blFindResidue(PDB *pdb, char *chain, int resnum, char *insert)
Definition: FindResidue.c:117
#define CHAINMATCH(chain1, chain2)
Definition: pdb.h:495
#define FREELIST(y, z)
Definition: macros.h:264
#define INIT(x, y)
Definition: macros.h:244
System-type variable type definitions.
PDB * blFindNextResidue(PDB *pdb)
Type definitions for maths.
struct pdb_entry * next
Definition: pdb.h:307
char chain[blMAXCHAINLABEL]
Definition: pdb.h:321
BOOL blParseResSpec(char *spec, char *chain, int *resnum, char *insert)
Definition: ParseRes.c:158
PDB * blExtractNotZonePDBAsCopy(PDB *inpdb, char *chain1, int resnum1, char *insert1, char *chain2, int resnum2, char *insert2)
char insert[8]
Definition: pdb.h:320