Home > matlab > utilities > dataset > describe_missing_data.m

describe_missing_data

PURPOSE ^

This function reads the dataset and determines the location of the missing observations (defined by NaNs)

SYNOPSIS ^

function [i,n,s,j] = describe_missing_data(data)

DESCRIPTION ^

 This function reads the dataset and determines the location of the missing observations (defined by NaNs)

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SOURCE CODE ^

0001 function [i,n,s,j] = describe_missing_data(data)
0002 % This function reads the dataset and determines the location of the missing observations (defined by NaNs)
0003 
0004 %@info:
0005 %! @deftypefn {Function File} {[@var{i}, @var{n}, @var{s}, @var{j} ] =} describe_missing_data (@var{data}, @var{gend}, @var{nvarobs})
0006 %! This function reads the dataset and determines where are the missing observations.
0007 %!
0008 %! @strong{Inputs}
0009 %! @table @var
0010 %! @item data
0011 %! Real matrix (T-by-N) for the dataset.
0012 %! @end table
0013 %!
0014 %! @strong{Outputs}
0015 %! @table @var
0016 %! @item i
0017 %! cell array (1-by-T). Each element is a @math{p_t\times 1} column vector of indices targeting the non-NaN variables at time t.
0018 %! @item n
0019 %! Integer scalar. The effective number of observations:
0020 %!    @math(n=\sum_{t=1}^T p_t)
0021 %! @item s
0022 %! Integer scalar. The value of the time index such that @math(p_t=p_s) for all @math(t\geq s).
0023 %! @item j
0024 %! cell array (1-by-N). Each element is a column vector targeting to the non-NaN observations of a variable.
0025 %! @end table
0026 %!
0027 %! @end deftypefn
0028 %@eod:
0029     
0030 % Copyright (C) 2008-2011 Dynare Team
0031 % stephane DOT adjemian AT univ DASH lemans DOT fr
0032 %
0033 % This file is part of Dynare.
0034 %
0035 % Dynare is free software: you can redistribute it and/or modify
0036 % it under the terms of the GNU General Public License as published by
0037 % the Free Software Foundation, either version 3 of the License, or
0038 % (at your option) any later version.
0039 %
0040 % Dynare is distributed in the hope that it will be useful,
0041 % but WITHOUT ANY WARRANTY; without even the implied warranty of
0042 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0043 % GNU General Public License for more details.
0044 %
0045 % You should have received a copy of the GNU General Public License
0046 % along with Dynare.  If not, see <http://www.gnu.org/licenses/>.
0047 
0048 [observation_index,variable_index] = find(~isnan(data));
0049 [T,N] = size(data);
0050 
0051 i = cell(1,T);
0052 j = cell(1,N);
0053 missing_observations_counter = NaN(T,1);
0054 
0055 for obs=1:T
0056     idx = find(observation_index==obs);
0057     tmp = variable_index(idx);
0058     missing_observations_counter(obs,1) = N-length(tmp);
0059     if rows(tmp(:))
0060         i(obs) = { tmp(:) };
0061     else
0062         i(obs) = { [] };
0063     end
0064 end
0065 
0066 missing_observations_counter = cumsum(missing_observations_counter);
0067 
0068 n = length(variable_index);
0069 
0070 if ~missing_observations_counter
0071     s = 1;
0072 else
0073     tmp = find(missing_observations_counter>=(T*N-n));
0074     s = tmp(1)+1;
0075 end
0076 
0077 if nargout>3
0078     for var=1:N
0079         idx = find(variable_index==var);
0080         tmp = observation_index(idx);
0081         j(var) = { tmp(:) };
0082     end
0083 end
0084 
0085 
0086 %@test:1
0087 %$ % Define a data set.
0088 %$ A = [ 1    1   ;   ...
0089 %$       1    NaN ;   ...
0090 %$       NaN  1   ;   ...
0091 %$       1    1   ;   ...
0092 %$       NaN  NaN ;   ...
0093 %$       1    NaN ;   ...
0094 %$       1    NaN ;   ...
0095 %$       1    1   ;   ...
0096 %$       1    1   ;   ...
0097 %$       1    1   ;   ...
0098 %$       1    1  ];
0099 %$
0100 %$ % Define expected results.
0101 %$ eB = cell(1,11);
0102 %$ eB(1)  = { transpose(1:2) };
0103 %$ eB(2)  = { 1 };
0104 %$ eB(3)  = { 2 };
0105 %$ eB(4)  = { transpose(1:2)};
0106 %$ eB(5)  = { [] };
0107 %$ eB(6)  = { 1 };
0108 %$ eB(7)  = { 1 };
0109 %$ eB(8)  = { transpose(1:2) };
0110 %$ eB(9)  = { transpose(1:2) };
0111 %$ eB(10) = { transpose(1:2) };
0112 %$ eB(11) = { transpose(1:2) };
0113 %$ eC = 16;
0114 %$ eD = 8;
0115 %$ eE = cell(1,2);
0116 %$ eE(1) = { [1; 2; 4; transpose(6:11)] };
0117 %$ eE(2) = { [1; 3; 4; transpose(8:11)] };
0118 %$
0119 %$ % Call the tested routine.
0120 %$ [B,C,D,E] = describe_missing_data(transpose(A));
0121 %$
0122 %$ % Check the results.
0123 %$ t(1) = dyn_assert(B,eB);
0124 %$ t(2) = dyn_assert(C,eC);
0125 %$ t(3) = dyn_assert(D,eD);
0126 %$ t(4) = dyn_assert(E,eE);
0127 %$ T = all(t);
0128 %@eof:1

Generated on Tue 22-May-2012 02:40:23 by m2html © 2005