Contents

Calibration - Correction demo on tetR repression data

(c) Vipul Singhal, California Institute of Technology, 2018

% % clean up things
clear all
close all
clc


%
st = dbstack('-completenames');
fp = st(1).file;
slashes = regexp(fp, '/');
projdir = fp(1:slashes(end)-1);
addpath(genpath(projdir));

Visualize the MCMC Chains and posterior distributions

Setup an array of all the .mat files where the calibration data posterior distributions are stored.

calib_SD = {'t015_calib_20171023_151627_1_MBP'
    't015_calib_20171023_151627_2_MBP'
    't015_calib_20171023_151627_3_MBP'
    't015_calib_20171023_151627_4_MBP'
    't015_calib_20171023_151627_5_MBP'
    't015_calib_20171023_151627_6_MBP'
    't015_calib_20171023_151627_7_MBP'
    't015_calib_20171023_151627_8_MBP'
    't015_calib_20171023_151627_9_MBP'
    't015_calib_20171023_151627_10_MBP'
    't015_calib_20171023_151627_11_MBP'
    't015_calib_20171023_151627_12_MBP'
    't015_calib_20171023_151627_13_MBP'
    't015_calib_20171023_151627_14_MBP'
    't015_calib_20171023_151627_15_MBP'
    't015_calib_20171023_151627_16_MBP'
    't015_calib_20171023_151627_17_MBP'
    't015_calib_20171023_151627_18_MBP'
    't015_calib_20171023_151627_19_MBP'
    't015_calib_20171023_151627_20_MBP'};

% Number of walkers used in the ensemble MCMC:
nW = 600;

% parameter legends for plotting parameter posterior distributions later
legends = {'kc1'    'P1'  'kc2'    'P2'   'kc3'    'P3', 'kfP', 'krP'};

% Concatenate the parameter arrays drawn from the .mat files.
mcat = catMC(calib_SD);

% Plot the MCMC chains (for 1/10 of the walkers for easy vidualization)
plotChains(mcat(:,1:10:end,:), nW, legends );
 calib_SD_converged = {...
    't015_calib_20171023_151627_13_MBP'
    't015_calib_20171023_151627_14_MBP'
    't015_calib_20171023_151627_15_MBP'
    't015_calib_20171023_151627_16_MBP'
    't015_calib_20171023_151627_17_MBP'
    't015_calib_20171023_151627_18_MBP'
    't015_calib_20171023_151627_19_MBP'
    't015_calib_20171023_151627_20_MBP'};

mcat_converged_calib = catMC(calib_SD_converged);

Plot pairwise projections of the joint posterior distribution.

figure
ecornerplot_vse(mcat_converged_calib,'scatter', true,'transparency',0.025,...
    'color',[.6 .35 .3], 'names', legends);

Visualize the resulting 'fits' of the model to the data

Overall idea: Pick 500 points from the parameter posterior distribution, generate trajectories, then take means, medians, standard deviations etc.

m_rearranged_calib = mcat_converged_calib(:,:)';
nptstotal = size(m_rearranged_calib, 1);
npts = 500;
paramid = randperm(nptstotal, npts);
params_to_use_calib = m_rearranged_calib(paramid, :);
medians_converged_calib = median(m_rearranged_calib);
envname = {'VS', 'MP', 'SG'};
% initialize things for the simulation and plotting.
load('t015_calib_20171023_151627_20_MBP', 'tvec', 'nW', 'model_calib',...
    'dosevals_calib','dosemap_calib', 'calibration_data', 'pmap_calib',...
    'nSp_calib', 'idMS_calib' )

nMS = size(calibration_data, 2); % nMS = 1 here, since only GFP is measured
nICs = size(dosevals_calib,2); % ICs, GFP DNA = [1 2 5 10 20] (nM)

nEnv = size(calibration_data, 4);

espIX = pmap_calib{1};
esspIX = pmap_calib{2};
cspIX = pmap_calib{3};

nESP = length(espIX); % the ESP indices in the model (not in logpjoint)
nESSP = length(esspIX);% the Env specific species indices in the model (not in logpjoint)
nCSP = length(cspIX); % the CSP indices in the model (not in logpjoint)

icvec = zeros(nSp_calib, 1);


simulatedtraj = zeros(length(tvec(1:13)),nMS, nICs , npts, nEnv);
maxGFP_sd = 0;

simulate the calibration model for all the randomly picked points from the posterior distribution

for kk = 1:npts
    logpjoint = params_to_use_calib(kk, :);
    cspindices = ((nESSP + nESP)*nEnv+1):length(logpjoint);
    paramvec = zeros(nESP+nCSP, 1);
    logpcsp = logpjoint(cspindices);
    paramvec(cspIX) = logpcsp;

    for envid = 1:nEnv
        espindices = (envid-1)*(nESP+nESSP) + (1:nESP);
        logpesp = logpjoint(espindices);
        paramvec(espIX) = logpesp;
        esspindices = ((envid-1)*(nESP+nESSP) + nESP + 1):envid*(nESP+nESSP);

        % set the values of the initial condition vector to the parameters
        icvec(esspIX) = exp(logpjoint(esspindices));

        for doseID = 1:nICs
            icvec(dosemap_calib) = dosevals_calib(:, doseID);

            % simulate the model
            [~, simudata] = model_calib(paramvec, icvec, tvec(1:13));
            for msid = 1:nMS
                simulatedtraj(:,msid, doseID, kk, envid) = simudata(:, idMS_calib(msid));
            end
        end
    end
end

Compute the means and standard deviations

meanvals = mean(simulatedtraj, 4);
sdvals= std(simulatedtraj,0, 4);
maxvals = squeeze(max(max(max(meanvals+sdvals,[], 1), [], 3), [], 5)); % 1 by nMS array.
lineStyles = linspecer(nICs,'sequential');
hd = zeros(nICs, 1); % data trajectory handles
hm = zeros(nICs, 1); % model fit mean trajectory handles
hsd = zeros(nICs, 1); % model fit sd trajectory handles (patch objects)
hd = zeros(nICs, 1); % data trajectory handles
hm = zeros(nICs, 1); % model fit mean trajectory handles
hsd = zeros(nICs, 1); % model fit sd trajectory handles (patch objects)

for msid = 1:nMS
    figure
    ss = get(0, 'screensize');
    set(gcf, 'Position', [50 100 ss(3)/1.2 ss(4)/2.5]);

    for j= 1:nEnv
        subplot(1, nEnv,j);
        for i = 1:nICs
            linearidx = nEnv*(i-1)+j;
            % each index correcponds to a dose environment pair. (ie, each
            % line and patch have a common index)
            hd(i)=plot(tvec(1:13)/3600,1000*calibration_data(1:13,msid, i,j ),...
                'color',lineStyles(i, :) ,'linewidth',1.4);
            hold on
            [hm(i), hsd(i)] = boundedline(tvec(1:13)/3600,...
                meanvals(:, msid, i, 1, j), sdvals(:, msid, i, 1, j));
            set(hsd(i), 'FaceColor', lineStyles(i, :).^4, 'FaceAlpha', 0.1);
            set(hm(i), 'Color', lineStyles(i, :).^4, 'LineStyle', ':');
            hold on
            set(hm(i), 'LineWidth', 0.8)
        end
        set(gca, 'Ylim', [0, round(maxvals(msid))])
        title(sprintf('GFP, e%s', envname{j}), 'FontSize', 16)
        xlabel('time, hours')
        ylabel('GFP, nM')
    end
    legend(hd, {'DNA = 1nM', 'DNA = 2nM','DNA = 5nM','DNA = 10nM','DNA = 20nM'},...
        'Location', 'NorthWest')
end

The test circuit (Correction Step)

We start with a circuit description tetR_repression: tet repression model, single step, first 3 hours.

D_T + P - D_T:P -> D_T + P + T

D_G + P - D_G:P -> D_G + P + G

2 T - T2

D_G + T2 - D_G:T2

Next we visualize the Markov chains and prosterior distributions for the test circuit.

corr_SD = {    't015_corr1_20171023_151627_2_MBP'
    't015_corr1_20171023_151627_3_MBP'
    't015_corr1_20171023_151627_4_MBP'
    't015_corr1_20171023_151627_5_MBP'
    't015_corr1_20171023_151627_6_MBP'
    't015_corr1_20171023_151627_7_MBP'
    't015_corr1_20171023_151627_8_MBP'
    't015_corr1_20171023_151627_9_MBP'
    't015_corr1_20171023_151627_10_MBP'
    't015_corr1_20171023_151627_11_MBP'
    't015_corr1_20171023_151627_12_MBP'
    't015_corr1_20171023_151627_13_MBP'
    't015_corr1_20171023_151627_14_MBP'
    't015_corr1_20171023_151627_15_MBP'
    't015_corr1_20171023_151627_16_MBP'
    't015_corr1_20171023_151627_17_MBP'}

corr_SD2 = {    't015_corr1_20171023_151627_11_MBP'
    't015_corr1_20171023_151627_12_MBP'
    't015_corr1_20171023_151627_13_MBP'
    't015_corr1_20171023_151627_14_MBP'
    't015_corr1_20171023_151627_15_MBP'
    't015_corr1_20171023_151627_16_MBP'
    't015_corr1_20171023_151627_17_MBP'}
nW = 600;
legends = {'kfPT'    'krPT'  'kfPG'    'krPG'   'kfdim', 'krdim', 'kfrep', 'krrep'};

mcat = catMC(corr_SD);
corr_SD =

  16×1 cell array

    {'t015_corr1_20171023_151627_2_MBP' }
    {'t015_corr1_20171023_151627_3_MBP' }
    {'t015_corr1_20171023_151627_4_MBP' }
    {'t015_corr1_20171023_151627_5_MBP' }
    {'t015_corr1_20171023_151627_6_MBP' }
    {'t015_corr1_20171023_151627_7_MBP' }
    {'t015_corr1_20171023_151627_8_MBP' }
    {'t015_corr1_20171023_151627_9_MBP' }
    {'t015_corr1_20171023_151627_10_MBP'}
    {'t015_corr1_20171023_151627_11_MBP'}
    {'t015_corr1_20171023_151627_12_MBP'}
    {'t015_corr1_20171023_151627_13_MBP'}
    {'t015_corr1_20171023_151627_14_MBP'}
    {'t015_corr1_20171023_151627_15_MBP'}
    {'t015_corr1_20171023_151627_16_MBP'}
    {'t015_corr1_20171023_151627_17_MBP'}


corr_SD2 =

  7×1 cell array

    {'t015_corr1_20171023_151627_11_MBP'}
    {'t015_corr1_20171023_151627_12_MBP'}
    {'t015_corr1_20171023_151627_13_MBP'}
    {'t015_corr1_20171023_151627_14_MBP'}
    {'t015_corr1_20171023_151627_15_MBP'}
    {'t015_corr1_20171023_151627_16_MBP'}
    {'t015_corr1_20171023_151627_17_MBP'}

Plot the chains for all the 17 iterations, for 20 of the walkers (for easier visualization).

plotChains(mcat(:,1:30:end, :), nW, legends );

Plot the scatterplot for the last 7 iterations

mcat_converged= catMC(corr_SD2);
figure
 ecornerplot_vse(mcat_converged(:,:, 1:10:end),...
    'scatter', true,'transparency',0.25, 'color',[.6 .35 .3], ...
    'names', legends);

Correction Demo Figure

Next we create the correction demo figure. This figure is arranged into 3 columns and nICs number of rows. Each row corresponds to one dose (initial condition). Within each row, the subplot corresponding to the first column has the test circuit behavior in the two environments of interest, the candidate environment eSG and the reference environment (eVS). The second column has the same two trajectories, bu in addition has the model fit to the candidate environment data. The third column has the 'corrected' behavior, along with the two data trajectories.

envrefID = 1; % can be changed to 2 to generate the correction from 3 to 2.
envcandID = 3;

m_rearranged = mcat_converged(:,:)';
nptstotal = size(m_rearranged, 1);
npts = 500;
paramid = randperm(nptstotal, npts);
params_to_use_corr = m_rearranged(paramid, :);
envname = {'VS', 'MP', 'SG'};
load(  't015_corr1_20171023_151627_11_MBP', 'tvec', 'nW', 'model_corr',...
    'dosevals_corr','dosemap_corr', 'correction_data', 'pmap_corr',...
    'nSp_corr', 'idMS_corr' )

nMS = size(correction_data, 2); % nMS = 1 here, since only GFP is measured
nICs = size(dosevals_corr,2); % ICs, tetR DNA = [0 0.25 0.5 0.75 1 2 5 10] (nM)

nEnv_total = size(correction_data, 4);
% the total number of environments for which we have data.
nEnv_used = 2;
% the number of environments considered to demonstrate the correction procedure.
nEnv_estimated = 1;
% the number of environments on which the parameter estimation was performed.

espIX = pmap_corr{1};
esspIX = pmap_corr{2};
cspIX = pmap_corr{3};

nESP = length(espIX); % the ESP indices in the model (not in logpjoint)
nESSP = length(esspIX);% the Env specific species indices in the model (not in logpjoint)
nCSP = length(cspIX); % the CSP indices in the model (not in logpjoint)

icvec = zeros(nSp_corr, 1);

we will have 2 sets of simulated trajectories, one for the candidate env and one for the reference. Therefore, the number of environments used is nEnv_used (= 2).

simulatedtraj_corrstep1 = zeros(length(tvec(1:13)),nMS, nICs , npts);
simulatedtraj_corrstep2 = zeros(length(tvec(1:13)),nMS, nICs , npts);

simulate the correction model for all the randomly picked points from the posterior distribution, fixing the ESPs and ESSPs to the candidate environments values.

for kk = 1:npts
    logpjoint_corr1 = [-0.2821 1.3714 params_to_use_corr(kk, :)];
    cspindices = ((nESSP + nESP)*nEnv_estimated+1):length(logpjoint_corr1);
    paramvec = zeros(nESP+nCSP, 1);
    logpcsp = logpjoint_corr1(cspindices);
    paramvec(cspIX) = logpcsp;
    logpesp = logpjoint_corr1(1:nESP);
    paramvec(espIX) = logpesp;
    esspindices = (nESP + 1):(nESP+nESSP);

    % set the values of the initial condition vector to the parameters
    icvec(esspIX) = exp(logpjoint_corr1(esspindices));

    for doseID = 1:nICs
        icvec(dosemap_corr) = dosevals_corr(:, doseID);
        % simulate the model
        [~, simudata] = model_corr(paramvec, icvec, tvec(1:13));
        for msid = 1:nMS
            simulatedtraj_corrstep1(:,msid, doseID, kk) = simudata(:, idMS_corr(msid));
        end
    end
end

Compute the mean and standard deviations for correction step 1

meanvals_corrstep1 = mean(simulatedtraj_corrstep1, 4);
sdvals_corrstep1= std(simulatedtraj_corrstep1,0, 4);
maxvals_corrstep1 = squeeze(max(max(max(meanvals_corrstep1+sdvals_corrstep1,...
    [], 1), [], 3), [], 5)); % 1 by nMS array.

Also, simulate the trajectories in the reference environment. here we randomly mix and match points from the reference environments environment specific parameters and species, and the CSP from correction step 1.

refmedians = medians_converged_calib(((envrefID-1)*(nESSP + nESP)+1):(envrefID*(nESSP + nESP)));

% other option: params_to_use_calib(kk,...
% ((envrefID-1)*(nESSP + nESP)+1):(envrefID*(nESSP + nESP))), though this is a
% bit buggy right now
for kk = 1:npts
    logpjoint_corrstep2 = [refmedians  params_to_use_corr(kk, :)];
    cspindices = ((nESSP + nESP)+1):length(logpjoint_corrstep2);
    paramvec = zeros(nESP+nCSP, 1);
    logpcsp = logpjoint_corrstep2(cspindices);
    paramvec(cspIX) = logpcsp;
        logpesp = logpjoint_corrstep2(1:nESP);
        paramvec(espIX) = logpesp;
        esspindices = (nESP + 1):(nESP+nESSP);

        % set the values of the initial condition vector to the parameters
        icvec(esspIX) = exp(logpjoint_corrstep2(esspindices));

        for doseID = 1:nICs
            icvec(dosemap_corr) = dosevals_corr(:, doseID);

            % simulate the model
            [~, simudata] = model_corr(paramvec, icvec, tvec(1:13));
            for msid = 1:nMS
                simulatedtraj_corrstep2(:,msid, doseID, kk) =...
                    simudata(:, idMS_corr(msid));
            end
        end
end

Compute the mean and standard deviations for correction step 1

meanvals_corrstep2 = mean(simulatedtraj_corrstep2, 4);
sdvals_corrstep2= std(simulatedtraj_corrstep2,0, 4);
maxvals_corrstep2 = squeeze(max(max(max(meanvals_corrstep2+sdvals_corrstep2,...
    [], 1), [], 3), [], 5)); % 1 by nMS array.

compute the max of the axis jointly for corrstep 1 and 2.

maxvals_corr = max([maxvals_corrstep1; maxvals_corrstep2], [], 1);

Inialize arrays for handles to the graphics objects.

lineStyles = linspecer(2*nICs,'sequential');
hd_cand = zeros(nICs, 3); % data trajectory handles for candidate environment
hd_ref = zeros(nICs, 3); % data trajectory handles for reference environment
hm_cand = zeros(nICs, 1); % model fit mean trajectory handles
hsd_cand = zeros(nICs, 1); % model fit sd trajectory handles (patch objects)
hm_ref = zeros(nICs, 1); % model prediction mean trajectory handles
hsd_ref = zeros(nICs, 1); % model prediction sd trajectory handles (patch objects)

create the 3 column subplot

nICs = 4
for msid = 1:nMS
    maxvals_corr(msid) = 1500
    figure
    ss = get(0, 'screensize');
    set(gcf, 'Position', [50 100 ss(3)/1.2 ss(4)/1.2]);

% for each initial condition row
    for i = 1:nICs
        % column 1: just the experimental data
        linearidx = 3*(i-1)+1;
        subplot(nICs, 3,linearidx);

        hd_ref(i, 1)=plot(tvec(1:13)/3600,1000*correction_data(1:13,msid, i, envrefID),...
            'color',lineStyles(i, :) ,'linewidth',0.8);
        hold on
        hd_cand(i, 1)=plot(tvec(1:13)/3600,1000*correction_data(1:13,msid, i,envcandID ),...
            'color',lineStyles(nICs+i, :) ,'linewidth',0.8);
        hold on
        set(gca, 'Ylim', [0, round(maxvals_corr(msid))])
        set(gca, 'Xlim', [0, 1.6])
        title(sprintf('Experimental data, tetR DNA = %0.2g',...
            dosevals_corr(2, i)), 'FontSize', 12)
        xlabel('time, hours')
        ylabel('GFP, nM')
        legend([hd_ref(i, 1), hd_cand(i, 1)], ...
            {'Reference Extract', 'Candidate Extract'}, 'Location', 'NorthWest')

        % column 2: overlay correction step 1 fit (CSP estimation)
        linearidx = 3*(i-1)+2;
        subplot(nICs, 3,linearidx);
        hd_ref(i, 2)=plot(tvec(1:13)/3600,1000*correction_data(1:13,msid, i, envrefID),...
            'color',lineStyles(i, :) ,'linewidth',0.8);
        hold on
        hd_cand(i, 2)=plot(tvec(1:13)/3600,1000*correction_data(1:13,msid, i,envcandID ),...
            'color',lineStyles(nICs+i, :) ,'linewidth',0.8);
        hold on
        [hm_cand(i), hsd_cand(i)] = boundedline(tvec(1:13)/3600,...
            meanvals_corrstep1(:, msid, i, 1), sdvals_corrstep1(:, msid, i, 1));
        set(hsd_cand(i), 'FaceColor', lineStyles(nICs+i, :).^4, 'FaceAlpha', 0.1);
        set(hm_cand(i), 'Color', lineStyles(nICs+i, :).^4, 'LineStyle', ':');
        hold on
        set(hm_cand(i), 'LineWidth', 1)
        set(gca, 'Ylim', [0, round(maxvals_corr(msid))])
        set(gca, 'Xlim', [0, 1.6])
        title(sprintf('Correction Step 1, tetR DNA = %0.2g',...
            dosevals_corr(2, i)), 'FontSize', 12)
        xlabel('time, hours')
        legend([hd_ref(i, 2), hd_cand(i, 2), hm_cand(i)], ...
            {'Reference Extract', 'Candidate Extract', 'Model Fit (mean, sd)'},...
            'Location', 'NorthWest')
%         ylabel('GFP, nM')

        % column 3: overlay correction step 2 prediction istead of
        % correction step 1 fit. ("corrected behavior")
        linearidx = 3*(i-1)+3;
        subplot(nICs, 3,linearidx);
        hd_ref(i, 3)=plot(tvec(1:13)/3600,1000*correction_data(1:13,msid, i, envrefID),...
            'color',lineStyles(i, :) ,'linewidth',0.8);
        hold on
        hd_cand(i, 3)=plot(tvec(1:13)/3600,1000*correction_data(1:13,msid, i,envcandID ),...
            'color',lineStyles(nICs+i, :) ,'linewidth',0.8);
        hold on
        [hm_ref(i), hsd_ref(i)] = boundedline(tvec(1:13)/3600,...
            meanvals_corrstep2(:, msid, i, 1), sdvals_corrstep2(:, msid, i, 1));
        set(hsd_ref(i), 'FaceColor', lineStyles(i, :).^4, 'FaceAlpha', 0.1);
        set(hm_ref(i), 'Color', lineStyles(i, :).^4, 'LineStyle', ':');
        hold on
        set(hm_ref(i), 'LineWidth', 1)
        set(gca, 'Ylim', [0, round(maxvals_corr(msid))])
        set(gca, 'Xlim', [0, 1.6])
        title(sprintf('Correction Step 2, tetR DNA = %0.2g',...
            dosevals_corr(2, i)), 'FontSize', 12)
        xlabel('time, hours')
        legend([hd_ref(i, 3), hd_cand(i, 3), hm_ref(i)],...
            {'Reference Extract', 'Candidate Extract', ...
            '''Corrected'' Trajectories (mean, sd)'}, 'Location', 'NorthWest')
%         ylabel('GFP, nM')

      % Compute the \% correction
      normorig = norm(1000*correction_data(1:13,msid, i,...
          envrefID) - 1000*correction_data(1:13,msid, i,envcandID ))
      normreduced = norm(1000*correction_data(1:13,msid, i,...
          envrefID) - meanvals_corrstep2(:, msid, i, 1))

    end
end
nICs =

     4


maxvals_corr =

        1500


normorig =

   1.1003e+03


normreduced =

  578.7837


normorig =

  605.3382


normreduced =

  163.5637


normorig =

  196.3645


normreduced =

   92.7892


normorig =

  135.7825


normreduced =

  102.1523

compute the % correction

clear nrm normorig normreduced nrm2 %nrmi normorigi normreducedi  nrm2i
nICs = 4
for msid = 1:nMS
% for each initial condition row
    for i = 1:nICs
      % 2 norm
      normorig(i) = norm(1000*correction_data(1:13,msid, i,...
          envrefID) - 1000*correction_data(1:13,msid, i,envcandID ));
      normreduced(i) = norm(1000*correction_data(1:13,msid, i, envrefID)...
          - meanvals_corrstep2(:, msid, i, 1));
      nrm(i) = ((normorig(i))/(normreduced(i)));
%       nrm2(i) = ((normorig(i)-normreduced(i))/(normorig(i)));

%       % infty norm (want the max difference to come down by 2X
%       normorigi(i) = norm(1000*correction_data(1:13,msid, i, envrefID)...
% - 1000*correction_data(1:13,msid, i,envcandID ), Inf);
%       normreducedi(i) = norm(1000*correction_data(1:13,msid, i, envrefID)...
% - meanvals_corrstep2(:, msid, i, 1), Inf);
%       nrmi(i) = ((normorigi(i))/(normreducedi(i)));
%       nrm2i(i) = ((normorigi(i)-normreducedi(i))/(normorigi(i)));
%

    end
end
weightz1 = squeeze(max(1000*correction_data(1:13,1, 1:4, envrefID),...
    [],1)/sum(max(1000*correction_data(1:13,1, 1:4, envrefID),[], 1)))'
sum(normorig)/sum(normreduced)
sum(nrm.*weightz1)
% sum(nrm2.*weightz1)

% sum(normorigi)/sum(normreducedi)
% mean(nrmi)
% 1/mean(nrm2i)
nICs =

     4


weightz1 =

    0.4667    0.2267    0.1663    0.1404


ans =

    2.1741


ans =

    2.2645