Python:Simulating DNA Melting

From Course Wiki
Jump to: navigation, search

For now, you can copy and run the following Python script, and follow along with the explanatory writeup from [Matlab:Simulating DNA Melting|the Matlab version] while it runs. In addition to Python, you will need to have installed the packages numpy, scipy, and matplotlib.

# Simulating DNA Melting in Python
# 20.309
# Translated from the Matlab by KAD, 4 Apr 2011

import numpy
import scipy.io as sio
from math import e, sqrt
import matplotlib.pyplot as plt

def quickplot(x, y, title, xlabel, ylabel):
    """
    Wrapper function for plotting one simple thing using matplotlib.pyplot
    """
    plt.plot(x, y)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()

class Experiment:
    """
    Empty class definition -- works like a struct in C or Matlab.
    """
    pass

def DnaFraction(Ct, T, DeltaS, DeltaH, R=1.987):
    """
    Returns the fraction of dsDNA in a solution containing equal concentrations
    of two complementary ssDNA oligos, as a function of total [DNA],
    temperature, entropy of annealing, and enthalpy of annealing.

    Default units are mole, calorie, kelvin. For other unit systems, supply
    the appropriate value for the optional gas constant parameter.

    T can be a single value or a numpy.array of values.
    """
    # Compute Ct * Keq
    CtKeq = Ct * numpy.exp(DeltaS/R - DeltaH/(R*T))

    # Compute f
    f = (1 + CtKeq - numpy.sqrt(1 + 2*CtKeq)) / CtKeq

    return f

def middles(arr):
    """
    Returns a new numpy array, 1 element shorter than the input array,
    whose values come from averaging each two adjacent values in the input.
    """
    result = []
    for i in range(0, len(arr) - 1):
        element = (arr[i] + arr[i+1]) / 2.0
        result.append(element)
    return numpy.array(result)

def plotDifferentiatedCurve(expt, title="Inverted Derivative of Melting Curve"):
    """
    Takes an Experiment and an optional title for the plot.
    Plots the (inverted) derivative of the melting curve;
    Tm should be the highest point.
    """
    expt.dTemp = middles(expt.temp)
    expt.diffDsDnaFraction = numpy.diff(expt.dsDnaFraction)
    quickplot(expt.dTemp, -1 * expt.diffDsDnaFraction,
              title, "Temperature (K)", "Delta dsDNA Fraction")

def makeIdeal():
    """
    Returns an ideal/theoretical DNA melting experiment.
    """
    ideal = Experiment()
    ideal.temp = numpy.array(range(0, 100)) + 273.15
    ideal.deltaS = -184
    ideal.deltaH = -71E3
    ideal.concentration = 1E-6
    ideal.dsDnaFraction = DnaFraction(ideal.concentration, ideal.temp,
                                      ideal.deltaS, ideal.deltaH)
    return ideal


def makeSimulated():
    """
    Returns a simulated experimental run, from which we'll produce
    realistic noisy data.
    """
    sim = Experiment()
    sim.concentration = 1e-6
    sim.deltaS = -184
    sim.deltaH = -71e3
    sim.initTemp = 363
    sim.finalTemp = 293
    sim.sampleRate = 1
    sim.duration = 900

    # create time vector (seconds)
    sim.time = numpy.array(numpy.arange(0.0, sim.duration, 1.0/sim.sampleRate))

    return sim

def simCooling(sim, plot=True):
    """
    Accepts a simulated experiment and calculates its temperature over time,
    simulating exponential passive cooling.
    """
    coolingConst = 180.0 #tau = 180s = 3min
    
    # T(t) = (Tf-Ti)e^-(t/tau) + Tf
    sim.temp = ((sim.initTemp - sim.finalTemp) * numpy.exp(-sim.time / coolingConst)) + sim.finalTemp
    if plot:
        quickplot(sim.time, sim.temp, "Simulated Temperature vs. Time",
                  "Time (sec)", "Temperature (K)")

def simRTD(sim, addNoise=True, plot=True):
    """
    Simulates RTD voltage (proportional to temp) vs. time,
    either with or without noise.
    """
    #Fix it so you don't have to run simCooling first. try except
    
    # R_RTD = 1000 + 3.85(T-273) (ohms)
    sim.rRTD = 1000 + 3.85 * (sim.temp - 273)

    # RTD is in a voltage divider.
    # V across RTD = total voltage * (RTD / (other+RTD)).
    vPower = 15     #15V
    rOther = 20e3   #20kohms
    sim.vRTD = vPower * (sim.rRTD / (rOther + sim.rRTD))

    if addNoise:
        #gaussian noise with stdev=1mV
        noise = numpy.random.normal(scale=0.001, size=len(sim.vRTD))
        sim.vRTD += noise

    if plot:
        quickplot(sim.time, sim.vRTD, "Simulated RTD Voltage vs. Time",
                  "Time (sec)", "RTD Voltage (V)")
    
def simFluorescenceSignal(sim, gaussianNoise=True, shotNoise=True,
                          photobleaching=True, plotFluor=True,
                          plotSignal=True):
    """
    Calculates dsDNA fraction vs. time, scales to obtain fluorescence signal,
    and optionally adds noise and photobleaching.
    """
    title = "Fluorescence Signal Voltage vs. Time"
    
    sim.dsDnaFraction = DnaFraction(sim.concentration, sim.temp,
                                    sim.deltaS, sim.deltaH)
    
    #simulate photobleaching of signal
    if photobleaching:

        #Set bleaching const such that 1/2 of fluorphores fail during experiment
        BleachingConstant = 0.5**(1.0/900)

        #compute difference
        sim.diffDsDnaFraction = numpy.diff(sim.dsDnaFraction)

        #initialize empty array
        sim.dsDnaFractionBleached = numpy.empty(len(sim.dsDnaFraction))

        #set initial condition for difference eqn
        sim.dsDnaFractionBleached[0] = 0.05                

        #For loop to compute difference equation
        for i in range(1, len(sim.dsDnaFraction)):
            sim.dsDnaFractionBleached[i] = BleachingConstant * sim.dsDnaFractionBleached[i-1] + sim.diffDsDnaFraction[i-1]
        title += "\n(+ Photobleaching)"

        #Moving variable names around to make the other code happy
        sim.dsDnaFractionUnbleached = sim.dsDnaFraction
        sim.dsDnaFraction = sim.dsDnaFractionBleached
        
    if plotFluor:
        quickplot(sim.time, sim.dsDnaFraction,
                  "Relative Fluorescence vs. Time",
                  "Time (sec)", "Relative Fluorescence (AU)")
            
    #Scale curve to simulate voltage output of photodiode / amplifier.
    #Amplifier range between 5 and 9 volts output
    ampRange = numpy.random.uniform(low=5.0, high=9.0)

    #Minimum between +/- 1
    ampMin = numpy.random.uniform(low=-1.0, high=1.0)

    #Scale output
    sim.fluorSignal = ampRange * sim.dsDnaFraction + ampMin 

    #gaussian random noise
    if gaussianNoise:
        #gaussian noise with stdev=10mV
        noise = numpy.random.normal(scale=0.01, size=len(sim.fluorSignal)) 
        sim.fluorSignal += noise
        title += " (+ Gaussian Noise)"

    #shot (impulse) noise
    if shotNoise:
        # Technically this should be poisson-distributed, but generating
        # poisson noise is actually not trivial. An acceptable substitute is
        # to make every sample have some probability P(spike) of being a spike
        # and probability 1-P(spike) of being the actual fluorescence sample.
        # (See the book "Numerical recipes in $language".)

        #Uniformly distributed random numbers
        noise = numpy.random.rand(len(sim.fluorSignal))

        #Probability of spike on any given sample
        spikeProbability = 1.0/75

        #Turn list of probabilities into list of zeros and 10V spikes
        noise = map(lambda i: 10 if i<spikeProbability else 0, noise)

        #Add in spikes
        sim.fluorSignal = [max(s, n) for s, n in zip(sim.fluorSignal, noise)] 
        title += " (+ Shot Noise)"

    if plotSignal:
        quickplot(sim.time, sim.fluorSignal, title,
                  "Time (sec)", "Fluorescence Signal Voltage (V)")

def formatDataForMatlab(rtd, fluor, filename):
    """
    Outputs a .mat file in the exact style of the data you get from
    running the LabView VM, i.e. an Nx2 matrix with RTD voltages in
    the first column and fluorescence signal voltages in the second.
    Inputs: two numpy arrays.
    """
    #Turn 1D numpy arrays into columns in a single matrix
    simData = numpy.column_stack((rtd, fluor))

    #Write matlab file
    sio.savemat(filename, {"simData": simData}) 


# If you run this script, plots will appear one after the other,
# following the sequence of the write-up.
# If you encounter a bug with the script drawing multiple plots in one run,
# comment out lines and/or change "plot=True" to "plot=False" as desired.
ideal = makeIdeal()

#comment out either of these 2 lines as desired
quickplot(ideal.temp, ideal.dsDnaFraction, "Ideal Melting Curve",
          "Temperature (K)", "dsDNA Fraction")
plotDifferentiatedCurve(ideal)

#change plot to = False as desired
sim = makeSimulated()
simCooling(sim, plot=True)
simRTD(sim, plot=True)
simFluorescenceSignal(sim, plotFluor=True, plotSignal=True)

#write out matlab file of sim data
formatDataForMatlab(sim.vRTD, sim.fluorSignal, "simData.mat")

To Do

  • Make the "ideal" part work more like the "sim" part, with the plot=True or False parameter
  • Link to numpy, scipy, matplotlib
  • Test on 309 lab computers and Athena
  • This code was written before TECs were included in the lab, which is why it includes only passive cooling and not forced heating. Add forced heating. If I finish making the heating control circuit work with the H-bridge, also add forced cooling.