0001 function [studyinfo,study_status] = dsMonitorStudy(studyinfo,varargin)
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025 options=dsCheckOptions(varargin,{...
0026 'verbose_flag',1,{0,1},...
0027 'process_id',[],[],...
0028 },false);
0029 if isstruct(studyinfo) && isfield(studyinfo,'study_dir')
0030
0031 studyinfo=dsCheckStudyinfo(studyinfo.study_dir,'process_id',options.process_id, varargin{:});
0032 else
0033
0034 studyinfo=dsCheckStudyinfo(studyinfo,'process_id',options.process_id, varargin{:});
0035 end
0036
0037
0038 if all(strcmp('finished',{studyinfo.simulations.status}))
0039 study_status=1;
0040 elseif any(~cellfun(@isempty,{studyinfo.simulations.error_log}))
0041 study_status=-1;
0042 else
0043 study_status=0;
0044 end
0045
0046 if options.verbose_flag==0
0047 return;
0048 end
0049
0050 fprintf('-------------------------------------------------------------\n');
0051
0052
0053 running=find(~arrayfun(@(x)isempty(x.machine_info),studyinfo.simulations));
0054 if any(running)
0055 hosts=arrayfun(@(x)x.machine_info.host_name,studyinfo.simulations(running),'uni',0);
0056
0057 uniq_hosts=unique(hosts);
0058 num_hosts=length(uniq_hosts);
0059
0060 num_simulations=zeros(1,num_hosts);
0061 num_finished=zeros(1,num_hosts);
0062 mean_duration=zeros(1,num_hosts);
0063 num_running=zeros(1,num_hosts);
0064 num_failed=zeros(1,num_hosts);
0065 num_cores=zeros(1,num_hosts);
0066 for i=1:num_hosts
0067
0068 these_sims=running(strcmp(uniq_hosts{i},hosts));
0069
0070 num_simulations(i)=length(these_sims);
0071
0072 started=strcmp('started',{studyinfo.simulations(these_sims).status});
0073 num_running(i)=length(find(started));
0074
0075 finished=strcmp('finished',{studyinfo.simulations(these_sims).status});
0076 num_finished(i)=length(find(finished));
0077
0078 if num_finished(i)>0
0079 mean_duration(i)=mean([studyinfo.simulations(these_sims(finished)).duration]);
0080 else
0081 mean_duration(i)=nan;
0082 end
0083
0084 failed=strcmp('failed',{studyinfo.simulations(these_sims).status});
0085 num_failed(i)=length(find(failed));
0086
0087 try
0088 num_cores(i)=studyinfo.simulations(these_sims(1)).machine_info.num_cores;
0089 catch
0090 num_cores(i)=nan;
0091 end
0092
0093 end
0094
0095 [~,I]=sort(mean_duration,2,'descend');
0096
0097 fprintf('Processing statistics (hosts sorted by mean compute time T):\n');
0098 for i=1:num_hosts
0099 index=I(i);
0100 fprintf(' @%s (%g cores)\n',uniq_hosts{index},num_cores(index));
0101 fprintf(' %g of %g sims finished (T: %gsec); %g failed; %g running.\n',num_finished(index),num_simulations(index),mean_duration(index),num_failed(index),num_running(index));
0102 end
0103 end
0104
0105
0106
0107 errors={studyinfo.simulations.error_log};
0108
0109 uniq_errors=unique(errors(cellfun(@ischar,errors)));
0110
0111 num_uniq_errors=length(uniq_errors);
0112
0113 error_inds=find(~cellfun(@isempty,errors));
0114 if options.verbose_flag
0115
0116 if any(error_inds)
0117 fprintf('Errors:\n');
0118 for i=1:length(error_inds)
0119 siminfo=studyinfo.simulations(error_inds(i));
0120 if strcmp(siminfo.status,'finished')
0121 fprintf(' Simulation %g (error corrected, now %s):\n',siminfo.sim_id,siminfo.status);
0122 elseif ~strcmp(siminfo.status,'failed')
0123 fprintf(' Simulation %g (now re-%s):\n',siminfo.sim_id,siminfo.status);
0124 else
0125 fprintf(' Simulation %g (%s):\n',siminfo.sim_id,siminfo.status);
0126 end
0127 try fprintf(' Host name: %s\n',siminfo.machine_info.host_name); end
0128 fprintf(' Start time: %s\n',siminfo.start_time);
0129 fprintf(' Error log: %s\n',siminfo.error_log);
0130 end
0131 end
0132 else
0133
0134 if any(error_inds)
0135 fprintf('Unique Errors:\n');
0136
0137
0138 for i=1:num_uniq_errors
0139
0140 if isempty(uniq_errors{i})
0141 continue;
0142 end
0143
0144 matches=strcmp(uniq_errors{i},errors);
0145 sim_ids=[studyinfo.simulations(matches).sim_id];
0146 fprintf(' Simulation(s) %s:\n',num2str(sim_ids));
0147 fprintf(' Error log: %s\n',uniq_errors{i});
0148 end
0149 end
0150 end
0151
0152
0153 fprintf('Paths:\n');
0154 fprintf(' Study directory: %s\n',studyinfo.study_dir);
0155 if ~isempty(studyinfo.paths)
0156 if isfield(studyinfo.paths,'mechanisms') && iscell(studyinfo.paths.mechanisms)
0157 if length(studyinfo.paths.mechanisms)==1
0158 fprintf(' Model files: %s\n',studyinfo.paths.mechanisms{1});
0159 else
0160 fprintf(' Model files:\n');
0161 for i=1:length(studyinfo.paths.mechanisms)
0162 fprintf(' %s\n',studyinfo.paths.mechanisms{i});
0163 end
0164 end
0165 end
0166 if isfield(studyinfo.paths,'dynasim_functions')
0167 fprintf(' DynaSim functions: %s\n',studyinfo.paths.dynasim_functions);
0168 end
0169 if isfield(studyinfo.paths,'batch_dir')
0170 fprintf(' Batch directory: %s\n',studyinfo.paths.batch_dir);
0171 end
0172 end
0173
0174
0175
0176 status={studyinfo.simulations.status};
0177
0178 uniq_status=unique(status);
0179
0180 fprintf('Simulation status summary:\n');
0181 for i=1:length(uniq_status)
0182 count=length(find(strcmp(uniq_status{i},status)));
0183 fprintf(' %g %s\n',count,uniq_status{i});
0184 end
0185
0186
0187 if all(strcmp('finished',status))
0188 fprintf('**** ALL SIMULATIONS HAVE FINISHED ****\n');
0189 end
0190 if all(strcmp('initialized',status))
0191 fprintf('**** NO SIMULATIONS HAVE STARTED ****\n');
0192 end
0193 if all(strcmp('started',status))
0194 fprintf('**** ALL SIMULATIONS ARE RUNNING ****\n');
0195 end
0196 if all(strcmp('failed',status))
0197 fprintf('**** ALL SIMULATIONS FAILED ****\n');
0198 end
0199
0200 fprintf('-------------------------------------------------------------\n');