Bioplib
Protein Structure C Library
 All Data Structures Files Functions Variables Typedefs Macros Pages
regression.c
Go to the documentation of this file.
1 /************************************************************************/
2 /**
3 
4  \file regression.c
5 
6  \version V1.0
7  \date 08.10.14
8  \brief Routines for finding the best fit line through a set of
9  points
10 
11  \copyright (c) Dr. Andrew C. R. Martin, UCL, 2014
12  \author Dr. Andrew C. R. Martin
13  \par
14  Institute of Structural & Molecular Biology,
15  University College London,
16  Gower Street,
17  London.
18  WC1E 6BT.
19  \par
20  andrew@bioinf.org.uk
21  andrew.martin@ucl.ac.uk
22 
23 **************************************************************************
24 
25  This program is not in the public domain, but it may be copied
26  according to the conditions laid out in the accompanying file
27  COPYING.DOC
28 
29  The code may not be sold commercially or included as part of a
30  commercial product except as described in the file COPYING.DOC.
31 
32 **************************************************************************
33 
34  Description:
35  ============
36 
37 **************************************************************************
38 
39  Usage:
40  ======
41 
42 **************************************************************************
43 
44  Revision History:
45  =================
46 - V1.0 08.10.14 Original By: ACRM Based on code by Abhi Raghavan
47 
48 *************************************************************************/
49 /* Doxygen
50  -------
51  #GROUP Maths
52  #SUBGROUP Matrices
53  #FUNCTION blCalculateCovarianceMatrix()
54  Find the covariance matrix for a given matrix
55 
56  #GROUP Maths
57  #SUBGROUP Geometry
58  #FUNCTION blCalculateBestFitLine()
59  Calculates a best fit line through a set of coordinates in 3D.
60  Results are returned as a vector and a point through which the vector
61  must pass.
62 
63  #FUNCTION blFindCentroid()
64  Calculates the centroid of an array of points. (Like blGetCofGPDB()
65  but for a coordinate array instead of a PDB linked list)
66 */
67 
68 /************************************************************************/
69 /* Includes
70 */
71 #include <math.h>
72 #include <stdlib.h>
73 #include "pdb.h"
74 #include "array.h"
75 #include "macros.h"
76 #include "MathType.h"
77 #include "SysDefs.h"
78 #include "regression.h"
79 #include "eigen.h"
80 
81 /************************************************************************/
82 /* Defines and macros
83 */
84 
85 /************************************************************************/
86 /* Globals
87 */
88 
89 /************************************************************************/
90 /* Prototypes
91 */
92 
93 /************************************************************************/
94 /*>BOOL blCalculateBestFitLine(REAL **coordinates, int numberOfPoints,
95  int numberOfDimensions, REAL *centroid,
96  REAL *eigenVector)
97  -----------------------------------------------------------------
98 *//**
99  \param[in] coordinates 2D Array containing the coordinates
100  \param[in] numberOfPoints The number of points in the array
101  \param[in] numberOfDimensions The number of dimensions (usually 3)
102  \param[out] centroid The centroid of the points
103  \param[out] eigenVector The best Eigen vector
104  \return Success?
105 
106  Calculates a best fit line through a set of coordinates in 3D.
107  The results are returned in 'eigenVector'
108 
109  The code calculates the centroid and ouputs this. The covariance
110  matrix is then calculated and the Eigen vectors and values of the
111  covariance matrix are then calculated. The Eigenvalue with the largest
112  Eigenvector represents the best fit line passing through the centroid.
113 
114 - 07.10.14 Original By: ACRM Based on code by Abhi Raghavan
115 */
116 BOOL blCalculateBestFitLine(REAL **coordinates, int numberOfPoints,
117  int numberOfDimensions, REAL *centroid,
118  REAL *eigenVector)
119 {
120  REAL *eigenValues = NULL,
121  **covarianceMatrix = NULL,
122  **eigenVectorMatrix = NULL;
123  BOOL retValue = TRUE;
124  int largestEigenValueIndex = 0,
125  i = 0;
126 
127  /* Allocate memory */
128  eigenValues = (REAL *)malloc(numberOfDimensions * sizeof(REAL));
129  covarianceMatrix = (REAL **)blArray2D(sizeof(REAL),
130  numberOfDimensions,
131  numberOfDimensions);
132  eigenVectorMatrix = (REAL **)blArray2D(sizeof(REAL),
133  numberOfDimensions,
134  numberOfDimensions);
135 
136  if((eigenValues == NULL) || (eigenVectorMatrix == NULL) ||
137  (covarianceMatrix == NULL))
138  {
139  retValue = FALSE;
140  }
141  else
142  {
143  /* Find the centroid of the coordinates */
144  blFindCentroid(coordinates, numberOfPoints, 3, centroid);
145 
146  /* Calculate the covariance matrix */
147  if(!blCalculateCovarianceMatrix(coordinates, numberOfPoints,
148  numberOfDimensions,
149  covarianceMatrix))
150  {
151  retValue = FALSE;
152  }
153  else
154  {
155  REAL largestEigenValue;
156 
157  /* Find the eigen values and vectors. */
158  if(blEigen(covarianceMatrix, eigenVectorMatrix, eigenValues,
159  numberOfDimensions) < 0)
160  {
161  retValue = FALSE;
162  }
163  else
164  {
165  /* Find the eigen vector corresponding to the largest Eigen
166  value.
167  */
168  largestEigenValue = eigenValues[0];
169  largestEigenValueIndex = 0;
170  for(i=1; i<numberOfDimensions; i++)
171  {
172  if(eigenValues[i] > largestEigenValue)
173  {
174  largestEigenValue = eigenValues[i];
175  largestEigenValueIndex = i;
176  }
177  }
178 
179  for(i=0;i<numberOfDimensions;i++)
180  {
181  eigenVector[i] =
182  eigenVectorMatrix[i][largestEigenValueIndex];
183  }
184  }
185  }
186  }
187 
188  /* Free allocated memory */
189  if(eigenValues) free(eigenValues);
190  if(eigenVectorMatrix) blFreeArray2D((char **)eigenVectorMatrix,
191  numberOfDimensions,
192  numberOfDimensions);
193  if(covarianceMatrix) blFreeArray2D((char **)covarianceMatrix,
194  numberOfDimensions,
195  numberOfDimensions);
196 
197  return(retValue);
198 }
199 
200 
201 /************************************************************************/
202 /*>void blFindCentroid(REAL **matrix, int numX, int numY,
203  REAL *centroid)
204  ------------------------------------------------------
205 *//**
206  \param[in] matrix Coordinate array
207  \param[in] numX The x-dimension of the array
208  \param[in] numY The y-dimension of the array
209  \param[out] centroid The centroid of the points
210 
211  Calculates the centroid of an array of points
212 
213 - 07.10.14 Original By: ACRM
214 */
215 void blFindCentroid(REAL **matrix, int numX, int numY,
216  REAL *centroid)
217 {
218  int point,
219  dim;
220 
221  for(dim=0; dim<numY; dim++)
222  centroid[dim] = (REAL)0.0;
223 
224  for(point=0; point<numX; point++)
225  {
226  for(dim=0; dim<3; dim++)
227  {
228  centroid[dim] += matrix[point][dim];
229  }
230  }
231 
232  for(dim=0; dim<3; dim++)
233  {
234  centroid[dim] /= numX;
235  }
236 }
237 
238 
239 /************************************************************************/
240 /*>BOOL blCalculateCovarianceMatrix(REAL **matrix, int numX, int numY,
241  REAL **cov)
242  -----------------------------------------------------------------
243 *//**
244  \param[in] matrix The matrix
245  \param[in] numX x-dimension of the matrix
246  \param[in] numY y-dimension of the matrix
247  \param[out] cov The covariance matrix
248  \return Success
249 
250  Find the covariance matrix for a given matrix
251 
252 - 07.10.14 Original By: ACRM (based on code by Abhi Raghavan)
253 */
254 BOOL blCalculateCovarianceMatrix(REAL **matrix, int numX, int numY,
255  REAL **cov)
256 {
257  int i = 0,
258  j = 0,
259  k = 0;
260  REAL total = 0.0,
261  *centroid;
262 
263  if((centroid=(REAL *)malloc(numY * sizeof(REAL)))==NULL)
264  return(FALSE);
265 
266  /* Calculate the centroid of the matrix */
267  blFindCentroid(matrix, numX, numY, centroid);
268 
269  /* Calculate the covariance matrix */
270  for(i=0; i<numY; i++)
271  {
272  for(j=i; j<numY; j++)
273  {
274  total=0.0;
275 
276  for(k=0; k<numX; k++)
277  {
278  total += ( (matrix[k][i] - centroid[i]) *
279  (matrix[k][j] - centroid[j]) );
280  }
281 
282  /* cov[i][j] = total/(REAL)numX; */
283  /* cov[j][i] = total/(REAL)numX; */
284 
285  cov[i][j] = total;
286  cov[j][i] = total;
287  }
288  }
289 
290  /* Free memory and return */
291  free(centroid);
292 
293  return(TRUE);
294 }
295 
296 
BOOL blCalculateCovarianceMatrix(REAL **matrix, int numX, int numY, REAL **cov)
Definition: regression.c:254
Include file for PDB routines.
short BOOL
Definition: SysDefs.h:64
#define NULL
Definition: array2.c:99
char ** blArray2D(int size, int dim1, int dim2)
Definition: array2.c:130
#define FALSE
Definition: macros.h:223
void blFindCentroid(REAL **matrix, int numX, int numY, REAL *centroid)
Definition: regression.c:215
Useful macros.
Include file for 2D/3D array functions.
int blEigen(REAL **matrix, REAL **eigenVectors, REAL *eigenValues, int matrixSize)
Definition: eigen.c:115
double REAL
Definition: MathType.h:67
void blFreeArray2D(char **array, int dim1, int dim2)
Definition: array2.c:174
#define TRUE
Definition: macros.h:219
System-type variable type definitions.
BOOL blCalculateBestFitLine(REAL **coordinates, int numberOfPoints, int numberOfDimensions, REAL *centroid, REAL *eigenVector)
Definition: regression.c:116
Type definitions for maths.