function [MI]=MI(X,Y,k)
%% MI : Mutual Information Estimator
%       uses method proposed by
%       Krakov et al. in
%       "Estimating Mutual Information"
%       Physical Review 69, 2004
%
%% Usage : [MI]=MI(X,Y,k)
%           
%           Estimates Mutual Information between 
%           input X (Nxd) and output Y (Nx1).
%           
%   Mandatory arguments : X (Nxd), Y (Nx1)
%   Optional arguments : k
%%

%% Declaring Variables
[samples, dimension]=size(X);
N=samples;
d=dimension;

z = sum(X.^2);
X = X(:,find(z > 0));

%% Testing arguments

% Enough arguments ?
if(nargin<2)
    error('Not enough arguments, type help MI for information');
end

% Size of input data OK ?
if(max(size(Y))~=samples)
    error('Output must have as many rows as the input data!');
end

% Set the KNN to a 6 Nearest Neighbour by default
if(nargin<3)
    k=6;
end


%% Establishing variables for computation

% Normalizing X
X=(X-repmat(mean(X),N,1))./repmat(std(X),N,1);

% Compute the distances matrices for X, Y and then Z
% Use of Largest Singular Value Norm
XDistancesVector=pdist(X);
XDistancesMatrix=squareform(XDistancesVector)+diag(Inf(N,1));

YDistancesVector=pdist(Y);
YDistancesMatrix=squareform(YDistancesVector)+diag(Inf(N,1));

ZDistancesVector=max(XDistancesVector, YDistancesVector);
ZDistancesMatrix=squareform(ZDistancesVector)+diag(Inf(N,1));

% Sorting Distances Matrices for KNN
ZDistancesMatrix = sort(ZDistancesMatrix)';

% Getting the distance to k Nearest Neighbour
% noted epsilon/2 in the paper
epsilon = ZDistancesMatrix(:,k);

% Count how many points at distance less than epsilon
nx = sum((XDistancesMatrix<repmat(epsilon, 1, N))');
ny = sum((YDistancesMatrix<repmat(epsilon, 1, N))');


%% Calculation of Mutual Information
MI=psi(k)-mean(psi(nx+1)+psi(ny+1))+psi(N);


