2012/05/31

statistics tips: my covariance matrix implementation on python

#!/usr/bin/env python                                                                                                                
#coding: utf-8                                                                                                                       
                                                                                                                                     
import numpy as np                                                                                                                   
from numpy.random import *                                                                                                           
                                                                                                                                     
def get_covariance_matrix(A):                                                                                                        
    N, M = A.shape                                                                                                                   
    ret = np.reshape(np.zeros(N*M), (N,M))                                                                                           
                                                                                                                                     
    for n in range(0, N):                                                                                                            
        for m in range(0, M):                                                                                                        
            ret[n][m] = calc_covariance(A, n, m)                                                                                     
                                                                                                                                     
    return ret                                                                                                                       
                                                                                                                                     
def calc_covariance(A, n, m):                                                                                                        
    nominator = np.sum((A[n] - np.average(A[n])) * (A[m] - np.average(A[m])))                                                        
    denominator = A.shape[0] - 1                                                                                                     
    return nominator/denominator                                                                                                     
                                                                                                                                     
def test():                                                                                                                          
    D = 4                                                                                                                            
    A = np.reshape([randint(100) for i in range(D**2)], (D, D))                                                                      
                                                                                                                                     
    print("# numpy.cov() output:")                                                                                                   
    print(np.cov(A))                                                                                                                 
                                                                                                                                     
    print("# my covariance matrix:")                                                                                                 
    print(get_covariance_matrix(A))                                                                                                  
                                                                                                                                     
    print("# my precision matrix:")                                                                                                  
    print(get_covariance_matrix(A) ** -1)  
                                                                                                                                     
if __name__ == "__main__":                                                                                                           
    test()                             
yaboo@overtones:~$ python kyoubunsan.py
# numpy.cov() output:
[[  226.91666667   -26.83333333   -39.33333333   499.75      ]
 [  -26.83333333   328.33333333   629.          -257.83333333]
 [  -39.33333333   629.          1432.66666667  -882.        ]
 [  499.75        -257.83333333  -882.          1976.25      ]]                                                                      
# my covariance matrix:
[[  226.91666667   -26.83333333   -39.33333333   499.75      ]
 [  -26.83333333   328.33333333   629.          -257.83333333]
 [  -39.33333333   629.          1432.66666667  -882.        ]
 [  499.75        -257.83333333  -882.          1976.25      ]]
# my precision matrix:                                                                                                               
[[ 0.0044069  -0.03726708 -0.02542373  0.002001  ]
 [-0.03726708  0.00304569  0.00158983 -0.00387847]
 [-0.02542373  0.00158983  0.000698   -0.00113379]
 [ 0.002001   -0.00387847 -0.00113379  0.00050601]]

5 comments:

  1. Do I get it right - it's for 4-D? So for example could be used to compare points in timespace described as [x, y, z, time]?

    ReplyDelete
  2. Tried to run it on my own matrix. Came up with the following error:
    File "/home/jakob/Dropbox/KU/DbDm/Assignments/3/1.py", line 127, in calc_covariance
    nominator = np.sum((A[n] - np.average(A[n])) * (A[m] - np.average(A[m])))
    ValueError: shapes (1,21) and (1,21) not aligned: 21 (dim 1) != 1 (dim 0).

    the matrix I gave it is 160x21, so I really don't know, why it all of a sudden is wrongly shaped throughout the calculations.

    ReplyDelete
    Replies
    1. nvm. Found a little problem in the calculations. in line 18, you should transpose (A[m] - np.average(A[m])), right? This can simply be done through numpy by adding '.T', so that it looks like:

      Nominator = np.sum((A[n] - np.average(A[n])) * (A[m] - np.average(A[m])).T)

      And then it worked flawlessly.

      Delete
    2. The transpose of a 1D array is still a 1D array!

      Delete
  3. denominator = A.shape[0] - 1

    or

    denominator = A.shape[1] - 1

    ReplyDelete

100