Home > functions > internal > dsStudyinfoIO.m

dsStudyinfoIO

PURPOSE ^

STUDYINFOIO - use lock files to manage concurrent access to a shared studyinfo

SYNOPSIS ^

function studyinfo = dsStudyinfoIO(studyinfo,study_file,id,verbose_flag)

DESCRIPTION ^

STUDYINFOIO - use lock files to manage concurrent access to a shared studyinfo

 This is an internal helper function called by dsCheckStudyinfo, dsSetupStudy,
 TrackStudy, and dsCreateBatch to prevent busy-file conflicts. file. i.e.,
 serialize read/writes for parallel processes in study batch.

 Usage:
   loading: studyinfo=dsStudyinfoIO([],study_file,[id,verbose_flag])
   saving:  dsStudyinfoIO(studyinfo,[study_file,id,verbose_flag]);

 Inputs:
   - studyinfo: (empty [] for loading) or (DynaSim studyinfo structure to save)
   - study_file: name of file to load or save
   - id: process identifier for lock file name [optional]

 Author: Jason Sherfey, PhD <jssherfey@gmail.com>
 Copyright (C) 2016 Jason Sherfey, Boston University, USA

CROSS-REFERENCE INFORMATION ^

This function calls: This function is called by:

SUBFUNCTIONS ^

SOURCE CODE ^

0001 function studyinfo = dsStudyinfoIO(studyinfo,study_file,id,verbose_flag)
0002 %STUDYINFOIO - use lock files to manage concurrent access to a shared studyinfo
0003 %
0004 % This is an internal helper function called by dsCheckStudyinfo, dsSetupStudy,
0005 % TrackStudy, and dsCreateBatch to prevent busy-file conflicts. file. i.e.,
0006 % serialize read/writes for parallel processes in study batch.
0007 %
0008 % Usage:
0009 %   loading: studyinfo=dsStudyinfoIO([],study_file,[id,verbose_flag])
0010 %   saving:  dsStudyinfoIO(studyinfo,[study_file,id,verbose_flag]);
0011 %
0012 % Inputs:
0013 %   - studyinfo: (empty [] for loading) or (DynaSim studyinfo structure to save)
0014 %   - study_file: name of file to load or save
0015 %   - id: process identifier for lock file name [optional]
0016 %
0017 % Author: Jason Sherfey, PhD <jssherfey@gmail.com>
0018 % Copyright (C) 2016 Jason Sherfey, Boston University, USA
0019 
0020 % check inputs
0021 if nargin<4, verbose_flag=0; end
0022 if nargin<3, id=[]; end
0023 if nargin<2 || isempty(study_file)
0024   study_file='studyinfo.mat';
0025 elseif isdir(study_file)
0026   study_file=fullfile(study_file,'studyinfo.mat');
0027 end
0028 study_dir=fileparts2(study_file);
0029 if nargin<1, studyinfo=[]; end
0030 
0031 % determine operating system
0032 [~,OS]=system('uname');
0033 OS=lower(strtrim(OS)); % operating system (uname: 'Linux', 'Darwin' (Mac), error (Windows))
0034 if length(OS)>7
0035   % remove dump (occurs randomly for some reason, with low frequency)
0036   OS=strtrim(OS(end-6:end));
0037 end
0038 
0039 %% prepare action-specific parameters for accessing studyinfo
0040 
0041 if isempty(id)
0042   % extract process IDs from names of all current lock files
0043   curr_ids=[];
0044   switch OS
0045     case {'linux','darwin'} % Linux or Mac
0046       % lock_file format: .lock_<timestamp>_<id>
0047       [status,result]=system(['ls ' study_dir '/.lock_* 2>/dev/null']);
0048       if status==0
0049         ids=regexp(result,'.lock_\d+_(\d+)','tokens');
0050         if ~isempty(ids), curr_ids=cellstr2num([ids{:}]); end
0051       end
0052     otherwise % Windows
0053       % lock_file format: lock_<timestamp>_<id>
0054       D=dir(study_dir);
0055       status=~any(find(~cellfun(@isempty,regexp({D.name},'^lock_'))));
0056       if status==0
0057         ids=regexp({D.name},'lock_\d+_(\d+)','tokens','once');
0058         if ~isempty(ids), curr_ids=cellstr2num([ids{:}]); end
0059       end
0060   end
0061 end
0062 
0063 MIN_LOAD_ID=1e7; % 10M
0064   % should be set to a number larger than the max number of sims or analyses expected in a batch
0065   % note: this gives priority to loading over saving
0066   % (since NextStudyinfoID = max existing lock id with min timestamp)
0067 
0068 % determine proper settings based on inputs (whether studyinfo struct was
0069 % provided to be saved or not)
0070 if isempty(studyinfo)
0071   % "Load Study" settings
0072   action='load';
0073   if isempty(id)
0074     % get id from max id of existing locks with id>=MIN_LOAD_ID else id=MIN_LOAD_ID
0075     if ~isempty(curr_ids) && any(curr_ids>=MIN_LOAD_ID)
0076       id=max(curr_ids)+1;
0077     else
0078       id=MIN_LOAD_ID; % value greater than the max # of batch processes (i.e., greater than the max process ID)
0079     end
0080   end
0081   if ~exist(study_file,'file')
0082     error('studyinfo.mat file not found: %s',study_file);
0083   end
0084 else
0085   % "Save Study" settings
0086   action='save';
0087   if isempty(id)
0088     % get id from max id of existing locks else 0
0089     if ~isempty(curr_ids) && any(curr_ids<MIN_LOAD_ID) && ismember(0,curr_ids)
0090       id=max(curr_ids)+1;
0091     else
0092       id=0; % note: batch process IDs start at id=1
0093     end
0094   end
0095 end
0096 
0097 %% create lock file for this process (id): lock_<timestamp>_<id>
0098 timestamp=datestr(now,'yyyymmddHHMMSSFFF'); % millisecond precision
0099 % --------------------------------------------
0100 switch OS
0101   case {'linux','darwin'} % Linux or Mac
0102     lock_file=fullfile(study_dir,sprintf('.lock_%s_%i',timestamp,id));
0103     [s,r]=system(['touch ' lock_file]);
0104     if s, error(r); end
0105     common_lock_file=fullfile(study_dir,'.locked');
0106   otherwise % Windows
0107     lock_file=fullfile(study_dir,sprintf('lock_%s_%i',timestamp,id));
0108     fid=fopen(lock_file,'w');
0109     fclose(fid);
0110     common_lock_file=fullfile(study_dir,'locked');
0111 end
0112 % --------------------------------------------
0113 if verbose_flag
0114   fprintf('created temporary lock file for this process: %s\n',lock_file);
0115 end
0116 
0117 % pause to allow lock files of simultaneous processes to appear
0118 % pause(.01); % wait 10ms
0119 
0120 try
0121 
0122 %% perform action (load or save) for this process when it's ID is the Next ID
0123 timeout=30; % seconds, total time to wait before failing to access studyinfo
0124 delay=0.001; % seconds, time to pause between attempts to access studyinfo
0125 max_num_timeouts=50; % # timeouts before giving up
0126   % note: each failed attempt may remove <=1 stale lock file blocking this process
0127 cnt=1; % attempt counter
0128 done=0; % {0,1} whether the action has completed successfully
0129 while ~done
0130   % try accessing studyinfo file and remove stale lock file if necessary after timeout
0131   for idx=1:(timeout/delay)
0132     next_id=NextStudyinfoID(study_dir,OS);
0133     % check if it's time for this process to perform its action
0134     if (id==next_id) && ~exist(common_lock_file,'file')
0135       % create common lock
0136       switch OS
0137         case {'linux','darwin'} % Linux or Mac
0138           [s,r]=system(['touch ' common_lock_file]);
0139           if s, error(r); end
0140         otherwise
0141           fid=fopen(common_lock_file,'w');
0142           fclose(fid);
0143       end
0144       try
0145         switch action
0146           case 'load'
0147             % load study_file
0148             if verbose_flag
0149               fprintf('loading study file: %s\n',study_file);
0150             end
0151             studyinfo=getfield(load(study_file,'studyinfo'),'studyinfo');
0152           case 'save'
0153             if isfield(studyinfo,'sim_id')
0154               % input is actually an updated simulation metadata substructure
0155               simulations=studyinfo;
0156               % load studyinfo from disk
0157               studyinfo=getfield(load(study_file,'studyinfo'),'studyinfo');
0158               % update simulation metadata
0159               for sim=1:length(simulations)
0160                 ix=[studyinfo.simulations.sim_id]==simulations(sim).sim_id;
0161                 studyinfo.simulations(ix)=simulations(sim);
0162               end
0163               if verbose_flag
0164                 fprintf('updating simulation metadata in study file: %s\n',study_file);
0165               end
0166             else
0167               if verbose_flag
0168                 fprintf('saving study file: %s\n',study_file);
0169               end
0170             end
0171             % save study_file
0172             try
0173               save(study_file,'studyinfo','-v7');
0174               if ~strcmp(reportUI,'matlab')
0175                 [wrn_msg,wrn_id] = lastwarn;
0176                 if strcmp(wrn_msg,'save: wrong type argument ''function handle''')
0177                   error('save: wrong type argument ''function handle''');
0178                 end
0179               end
0180             catch
0181               fprintf('Data is not ''-v7'' compatible. Saving in hdf5 format.\n')
0182               save(study_file,'studyinfo','-hdf5');
0183             end
0184         end
0185         done=1; break;
0186       catch
0187         if verbose_flag
0188           fprintf('failed to %s study file: %s\n',action,study_file);
0189         end
0190         pause(delay); % wait
0191       end
0192     else
0193       pause(delay); % wait
0194     end
0195     % check if next_id is unchanged (i.e., the same lock file continues
0196     % to block this process)
0197     if idx==1
0198       is_unchanged=1;
0199     else
0200       is_unchanged = is_unchanged && (next_id==last_next_id);
0201     end
0202     last_next_id=next_id;
0203   end
0204   % if timed out and next_id has stayed the same: remove next_id lock
0205   if idx==(timeout/delay) && is_unchanged
0206     % remove lock on next_id (that process may have failed before removing
0207     % its lock file)
0208     D=dir(study_dir); % contents of study_dir directory
0209     pat=sprintf('^.?lock_\\d+_%i$',last_next_id);
0210     ind=find(~cellfun(@isempty,regexp({D.name},pat)));
0211     if ~isempty(ind)
0212       next_lock_file=D(ind).name; % file with next_id (^.?lock_*_<next_id>$)
0213       if verbose_flag
0214         fprintf('deleting stale temporary lock file: %s\n',next_lock_file);
0215       end
0216       delete(next_lock_file);
0217       delete(common_lock_file);
0218     end
0219   end
0220   if ~done
0221     if verbose_flag
0222       fprintf('TIMEOUT #%g while waiting to %s study file for process %g (next_id=%g).\n',cnt,action,id,next_id);
0223     end
0224     cnt=cnt+1;
0225   end
0226   % check if max attempts has been exceeded
0227   if cnt>max_num_timeouts
0228     % delete this process's lock file and give up on action
0229     if verbose_flag
0230       fprintf('deleting temporary lock file for this process: %s\n',lock_file);
0231     end
0232     delete(lock_file);
0233     delete(common_lock_file);
0234     error('failed to access studyinfo file after %g timeouts.',max_num_timeouts);
0235   end
0236 end
0237 % remove temporary lock for this process
0238 if verbose_flag
0239   fprintf('deleting temporary lock file for this process: %s\n',lock_file);
0240 end
0241 delete(lock_file);
0242 delete(common_lock_file);
0243 
0244 catch err
0245   if verbose_flag
0246     fprintf('deleting temporary lock file for this process: %s\n',lock_file);
0247   end
0248   delete(lock_file);
0249   delete(common_lock_file);
0250   displayError(err);
0251 end
0252 
0253 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0254 %% SUBFUNCTIONS
0255 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
0256 function id=NextStudyinfoID(study_dir,OS)
0257 % purpose: determine the max existing lock id with min timestamp
0258 % i.e., get the max sim_id for all processes waiting to write to/read from
0259 % studyinfo.mat, as determined by the existence of .?lock_* files.
0260 % lock_file format: .lock_<timestamp>_<id> or lock_<timestamp>_<id>
0261 id=0; % next process id
0262 switch OS
0263   case {'linux','darwin'} % Linux or Mac
0264     % check if there are any lock files
0265     [status,result]=system(['ls ' fullfile(study_dir,'.lock_* 2>/dev/null')]);
0266     if status==0 % there exist lock files
0267       % get list of locked ids
0268       ids=regexp(result,'.lock_\d+_(\d+)','tokens');
0269       if ~isempty(ids)
0270         % identify the max id
0271         ids=[ids{:}];
0272         id=max(cellstr2num(ids));
0273       end
0274 %       % get list of timestamps in lock file names
0275 %       timestamps=regexp(result,'.lock_(\d+)_\d+','tokens');
0276 %       if ~isempty(timestamps)
0277 %         % identify the next timestamp to process
0278 %         timestamps=[timestamps{:}];
0279 %         x=cellstr2num(timestamps);
0280 %         timestamp=timestamps{x==min(x)};
0281 %         % get list of locked ids with that timestamp
0282 %         ids=regexp(result,sprintf('.lock_%s_(\\d+)',timestamp),'tokens');
0283 %         % get max id from lock with min timestamp
0284 %         id=max(cellstr2num([ids{:}]));
0285 %       end
0286     end
0287   otherwise % Windows
0288     D=dir(study_dir);
0289     status=~any(find(~cellfun(@isempty,regexp({D.name},'^lock_'))));
0290     if status==0 % there exist lock files
0291       % get list of timestamps in lock file names
0292       timestamps=regexp({D.name},'lock_(\d+)_\d+','tokens','once');
0293       if ~isempty(timestamps)
0294         % identify the next timestamp to process
0295         timestamps=[timestamps{:}];
0296         if isempty(timestamps), return; end
0297         x=cellstr2num(timestamps);
0298         timestamp=timestamps{x==min(x)};
0299         % get list of locked ids with that timestamp
0300         ids=regexp({D.name},sprintf('lock_%s_(\\d+)',timestamp),'tokens','once');
0301         % get max id from lock with min timestamp
0302         id=max(cellstr2num([ids{:}]));
0303       end
0304     end
0305 end
0306 
0307 %% wait until there are no lock files from other processes (or timeout)
0308 % NOTE: no longer necessary since adding timestamp to lock file name...
0309 % todo: remove this section after extensive testing (do under version
0310 % control so that the code remains on record)
0311 %{
0312 timeout=30*5; % seconds
0313 delay=0.01; % seconds
0314 for idx=1:(5*timeout/delay) % timeout after 5*timeout sec (then clear all lock files if timed out)
0315     % note: time-out at this step should be longer than below to allow for
0316     % removal of stale lock files by other processes currently attempting access.
0317   % check if there exist any files named .lock_*
0318   % --------------------------------------------
0319   switch OS
0320     case {'linux','darwin'} % Linux or Mac
0321       % lock_file format: .lock_<timestamp>_<id>
0322       [status,~]=system(['ls ' study_dir '/.lock_* 2>/dev/null']); % note: ls is faster than dir
0323     otherwise % Windows
0324       % lock_file format: lock_<timestamp>_<id>
0325       D=dir(study_dir);
0326       status=~any(find(~cellfun(@isempty,regexp({D.name},'^lock_'))));
0327   end
0328   % --------------------------------------------
0329   if status==0 % there exists a file .lock_*
0330     % note: {.lock_*} are temporary files created to indicate periods during
0331     % which studyinfo.mat is being accessed. studyinfo.mat should not be
0332     % loaded until all .lock_* files have been removed.
0333     pause(delay); % wait
0334   else
0335     break;
0336   end
0337 end
0338 % if timed out: delete all lock files blocking this process
0339 if idx==(timeout/delay)
0340   if verbose_flag
0341     fprintf('deleting all temporary lock files blocking this process...\n');
0342   end
0343   D=dir(study_dir); % contents of study_dir directory
0344   inds=find(~cellfun(@isempty,regexp({D.name},'^.?lock_')));
0345   % delete all lock files
0346   for i=1:length(inds)
0347     file=fullfile(study_dir,D(inds(i)).name);
0348     if verbose_flag
0349       fprintf('\t%s\n',file);
0350     end
0351     delete(file);
0352   end
0353 end
0354 %}

Generated on Tue 12-Dec-2017 11:32:10 by m2html © 2005