%% tetR data correction for Vipul Singhal's Thesis
% Perform Calibration-Correction on the constitutive GFP data and tetR
% repression data. 
%
% MODELS: 
% Characterization model:
% D_G + E <-> D_G:E -> D_G + E + G 
%
% Calibration model:
% D_T + E <-> D_T:E -> D_T + E + T
% D_G + E <-> D_G:E -> D_G + E + G
% 2 T <-> T2
% D_G + T2 <-> D_G:T2
%
% (c) Vipul Singhal, Caltech 2018





close all
clear all

%% Initialize path and set simulation options

% add the path of the files needed to run the simulations. 
st = dbstack('-completenames');
fp = st(1).file;
slashes = regexp(fp, '/');
projdir = fp(1:slashes(end)-1);
addpath(genpath(projdir));

% Use saved results to generate plotsor redo the estimations. 
usesaved = true;

% use parallel computing? true or false
parallalflag = false;

% How many steps to run the mcmc simulation for 
nsteps = 2e5;

% number of walkers to use
nW = 600;

% step size for the mcmc
stepsz = 3;

% parameter lower and upper bounds in log space. (all parameters)
lb =-8; ub = 8;

% mcmc noise model standard deviation and tightening
stdev = 1;
tightening = 2;

%% Collect experimental data
[~ , calibration_data, dosevals_calib] = import_ptetconstitutive;
[tvec, correction_data, dosevals_corr] = import_tetR_repression; 

%% Set up calibration step model

%%%%%%%%%% CALIBRATION STEP MODEL %%%%%%%%%%%%
% set up a function that takes a parameter point, an initial species
% concentration and a vector of time points and returns the simulation
% trajectories. 
model_calib = @(logp, sp0, tspan)...
    ode15s(@(t,sp) constitutive_gfp3(t,sp,logp), tspan, sp0);

% The pmap_calib cell array contains vectors of indices of the extract
% specific parameters, extract specific species initial concentration
% (another type of extract specific parameter), and the circuit specific
% parameters. These indices are the indices in the 'logp' and 'sp0' arrays
% in the model_calib function. 

% extract specific parameter (ESP)
espix = 1; % kc rate parameter in logp in model_calib (ie, constitutive_gfp3)

% circuit specific parameter (CSP)
cspix = 2:3; % kfP and krP rate parameters in logp

% Extract specific species initial concentration parameter (ESSP)
esspix = 2; % teh enzyme initial concentration (species "P" in model)

% A cell array of these index vectors. 
pmap_calib = {espix, esspix, cspix };

% number of extracts. In this case nEnv = 3. 
nEnv = size(calibration_data, 4);

% number of calibration parameters, ESPs, CSPs and ESSPs)
% This sets the length of logpjoint, (which, in the mcmc_simbio toolbox, is
% called the masterVector - this code was written before that toolbox, and
% indeed inspired that toolbox)
% 
% Note the structure of logpjoint: 
% 
% logpjoint = [E1 ESPs, E1 ESSPs, E2 ESPs, E2 ESSPs, E3 ESPs, E3 ESSPs, CSPs]
nparam_calib = nEnv*(length(espix)+ length(esspix))+ length(cspix); 

% Note that pmap_calib's elements (espix, cspix, esspix) give the indices 
% that allow us to distribute the elements of logpjoint to the respective
% models: 
% 
% MODEL for circuit in E1: 
% 
% logp(espix) = logpjoint(1:length(espix));
%
% total_non_CSPs = nEnv*(length(espix)+ length(esspix));
% logp(cspix) = logpjoint(total_non_CSPs:nparam_calib);
% 
% startESSP1 = (length(espix)+1);
% endESSP1 = (length(espix)+ length(esspix));
% sp0(esspix) = logpjoint(startESSP1:endESSP1);
%
% With similar prescriptions for the model in the remaining extracts. 
% 
% Note that the remaining species initial concentrations are either set by
% the dosing (described below) or are set to 0. 

% Set the species to be dosed and to be measured. 

nSp_calib = 4; % total number of species in the model

% index (in sp0) of the species to be dosed. This is the initial DNA conc
% in our case. 
dosemap_calib = 1; % the 1st species is GFP dna, D_G. This gets dosed. 

% index (in sp0) of the species to compare to experimental data. 
idMS_calib = 4; % the GFP species, G is the 4th species in the model. 


%% Define likelihood and prior functions for calibration step
% 
% 
lognormvec=@(res,sig) -(res./sig).^2 -log(sqrt(2*pi)).*sig;

% The function that computes the log likelihood of the model parameters
% given the model, a parameter point to evaluate the model at and the
% corresponding data set. 
loglike_calib = @(logpjoint) log_likelihood_sharedCSP(model_calib,...
    nSp_calib, logpjoint, pmap_calib, tvec(1:13),...
    1000*calibration_data(1:13,:,:,:), dosevals_calib,...
    dosemap_calib, idMS_calib,lognormvec, stdev/tightening);

% The prior function
logprior = @(logp) all(lb < logp) && all(logp < ub); 
%% Perform the calibration step Bayesian inference
% All the ESPs and CSPs in the calibration models get estimated, with the CSPs
% shared across the models. 


% Initialize the parameters in a latin hypercure defined by the parameter
% ranges. 
mdpts = (lb+ub)/2;
width = abs(lb)+ abs(ub);
lhsamp = width*(lhsdesign(nW, nparam_calib)-0.5); 
minit=bsxfun(@plus,mdpts,lhsamp'); % set of initial parameter points. 


% run the initial burn in simulation
tic
[m, lPburnin] =gwmcmc_vse(minit,{logprior loglike_calib},nsteps,...
    'StepSize',stepsz , ...
    'ThinChain',20, 'Parallel', parallalflag);%
toc
datestring = datestr(now, 'yyyymmdd_HHMMSS');


% Run the actual MCMC simulation. We break up the mcmc into 10 iterations, 
% where each iteration uses nsteps number of model simulation evaluations. 
% Each iteration uses the last sets of positions of the walkers as initial
% points, so that the 10 iterations form a continuous mcmc run. 
for i = 1:10
minit = m(:,:,end);
clear m

tic
[m, lp]=gwmcmc_vse(minit,{logprior loglike_calib},nsteps,'ThinChain',20,...
    'Parallel', parallalflag, 'StepSize', stepsz);
toc

% save the data in a .mat file, using the datestring of the simulation run.
 eval(['save(''t015_calib_' datestring '_' num2str(i) '_MBP'');'])
  
end


%% Plot the calibration step results

%% Set up correction step 1 model
%%%%%%%%%%% TEST (CORRECTION STEP) MODEL %%%%%%%%%%%%%
model_corr = @(logp, sp0, tspan) ...
    ode15s(@(t,sp) tetR_repression(t,sp,logp), tspan, sp0);

% Similar to the calibration model, the ESPs, ESSPs and CSPs here are as
% defined in the arrays: 
espix = 1;
cspix = 2:9;
esspix = 2; 
nEnv = size(correction_data, 4);
nparam_corr = nEnv*(length(espix)+ length(esspix))+ length(cspix); 
%length of logpjoint

pmap_corr = {espix, esspix, cspix};
nSp_corr = 9; 
dosemap_corr = [6,1]; 
idMS_corr = 9;
%% Define likelihood and prior functions for correction step 1
lognormvec=@(res,sig) -(res./sig).^2 -log(sqrt(2*pi)).*sig;

loglike_corr = @(logpjoint) log_likelihood_sharedCSP(model_corr,...
    nSp_corr, logpjoint, pmap_corr, tvec(1:13),...
    1000*correction_data(1:13,:,:,:), dosevals_corr,...
    dosemap_corr, idMS_corr,lognormvec, stdev/tightening);

logprior = @(logp) all(lb < logp) && all(logp < ub); 

%% Perform the correction step 1 Bayesian inference
% ie, fix the ESP and the ESSP in the correction model and estimate the CSP
% for the correction model 
% we do the estimation using data and ESP parameters from extract number 3, eSG. 

eval(['load(''t015_calib_' datestring '_10_MBP'', ''m'');']) 
mstacked = m(:,:)';
medn = median(mstacked);

% !!! can replace medians here with an arbitrary point !!!
kc_calib = medn(5); %use medians
P_calib = medn(6); %use medians

% the correction step 1 model involves setting the ESPs for the third
% extract to values obtained from the calibration step, and estimating the
% CSPs. 
loglike_corr1 = @(logp_corrcsp) log_likelihood_sharedCSP(model_corr,...
    nSp_corr, [kc_calib; P_calib; logp_corrcsp], pmap_corr, tvec(1:13), ...
    1000*correction_data(1:13,:,:,3), dosevals_corr, dosemap_corr, idMS_corr,...
    lognormvec, stdev/tightening);


% there are 8 parameters in the first correction step: the 8 CSPs in the
% model model_corr (which actually calls tetR_repression.m)
nparam_corr1 = 8;
lhsamp = width*(lhsdesign(nW, nparam_corr1)-0.5); 
minit=bsxfun(@plus,mdpts,lhsamp);

% burn in phase
tic
[m, lP] =gwmcmc_vse(minit,{logprior loglike_corr1},nsteps,...
    'StepSize',stepsz , ...
    'ThinChain',10, 'Parallel', parallalflag);%
toc

corr_SD = cell(nIter,1);
nIter = 10;
stepsz = 0.95;
nsteps=1e5;

% Run the actual MCMC simulation 
for i = 1:nIter
minit = m(:,:,end);
clear m

tic
[m, lp]=gwmcmc_vse(minit,{logprior loglike_corr1},nsteps,'ThinChain',10,...
    'Parallel', parallalflag, 'StepSize', stepsz);
toc
svstr = ['t015_corr1_' datestring '_' num2str(i) '_MBP'];
save(svstr);
corr_SD{i} = svstr;


end