Python:Simulating DNA Melting

From Course Wiki
Jump to: navigation, search

Hi! I work, even though there's no writeup!

For now, you can copy and run the Python script, and follow along with the explanatory writeup from the Matlab version while it runs. In addition to Python, you will need to have installed the packages numpy, scipy, and matplotlib. (These packages are already installed on the lab computers.)

To Do

  • Test on 309 lab computers and Athena.
  • Works on 309 lab computers. Has the same issue of plots appearing sequentially and the script not proceeding until the plot window is dismissed. Should I write a version that waits until the end and plots everything all at once?

Code v1.3

This code models forced heating followed by either forced or passive cooling. It differs slightly from the Matlab writeup, which was back when forced heating and cooling didn't exist and there was only passive cooling.

Also, now it optionally plots the melting curve (in addition to fluorescence signal vs. time) at the end.

# Simulating DNA Melting in Python
# 20.309
# Translated from the Matlab by KAD, 4 Apr 2011
# v1.0 works the same as the Matlab
# v1.1 including forced heating and option to plot melting curve at end
# v1.2 including option for forced cooling instead of passive cooling
# v1.3 changed format of output to match actual LabView VI output
# (was wrong before)

import numpy
import scipy.io as sio
import matplotlib.pyplot as plt

def quickplot(xvector, yvector, title, xlabel, ylabel):
    """
    Wrapper function for making a simple plot using matplotlib.pyplot
    """
    plt.plot(xvector, yvector)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()

class Experiment:
    """
    Empty class definition -- works like a struct in C or Matlab.
    """
    pass

def DnaFraction(Ct, T, DeltaS, DeltaH, R=1.987):
    """
    Returns the fraction of dsDNA in a solution containing equal concentrations
    of two complementary ssDNA oligos, as a function of total [DNA],
    temperature, entropy of annealing, and enthalpy of annealing.

    Default units are mole, calorie, kelvin. For other unit systems, supply
    the appropriate value for the optional gas constant parameter.

    T can be a single value or a numpy.array of values.
    """
    # Compute Ct * Keq
    CtKeq = Ct * numpy.exp(DeltaS/R - DeltaH/(R*T))

    # Compute f
    f = (1 + CtKeq - numpy.sqrt(1 + 2*CtKeq)) / CtKeq

    return f

def middles(arr):
    """
    Returns a new numpy array, 1 element shorter than the input array,
    whose values come from averaging each two adjacent values in the input.
    """
    result = []
    for i in range(0, len(arr) - 1):
        element = (arr[i] + arr[i+1]) / 2.0
        result.append(element)
    return numpy.array(result)

def diffedCurve(expt, plot=True):
    """
    Takes an Experiment and calculates a differentiated melting curve.
    Optionally plots it.
    """
    expt.dTemp = middles(expt.temp)
    expt.diffDsDnaFraction = numpy.diff(expt.dsDnaFraction)
    if plot:
        quickplot(expt.dTemp, -1 * expt.diffDsDnaFraction,
                  "Inverted Derivative of Melting Curve",
                  "Temperature (K)", "Delta dsDNA Fraction")

def makeIdeal(plot=True):
    """
    Returns an ideal/theoretical DNA melting experiment.
    Optionally plots it.
    """
    ideal = Experiment()
    ideal.temp = numpy.array(range(0, 100)) + 273.15
    ideal.deltaS = -184
    ideal.deltaH = -71E3
    ideal.concentration = 1E-6
    ideal.dsDnaFraction = DnaFraction(ideal.concentration, ideal.temp,
                                      ideal.deltaS, ideal.deltaH)
    if plot:
        quickplot(ideal.temp, ideal.dsDnaFraction,
                  "Ideal DNA Melting Curve",
                  "Temperature (K)", "dsDNA Fraction")
    return ideal


def makeSimulated(forcedCooling=False):
    """
    Returns a simulated experimental run, from which we'll produce
    realistic noisy data.
    """
    sim = Experiment()
    sim.concentration = 1e-6
    sim.deltaS = -184
    sim.deltaH = -71e3
    sim.hotTemp = 363.0 #90C
    sim.coolTemp = 293.0 #20C
    sim.sampleRate = 1.0 #samples per second

    sim.forcedCooling = forcedCooling
    
    sim.heatingRate = 0.5 #degrees per second
    sim.heatingDuration = (sim.hotTemp - sim.coolTemp) / sim.heatingRate

    if sim.forcedCooling:
        sim.coolingRate = -0.5 #degrees per second
        sim.coolingDuration = (sim.coolTemp - sim.hotTemp) / sim.coolingRate
    else:
        sim.coolingDuration = 900.0 #15 min to cool (would really take a bit longer)

    # create time vectors (seconds)
    sim.heatingTime = numpy.array(numpy.arange(0.0,
                                               sim.heatingDuration,
                                               1/sim.sampleRate))
    sim.coolingTime = numpy.array(numpy.arange(0.0,
                                               sim.coolingDuration,
                                               1/sim.sampleRate))
    sim.time = numpy.array(numpy.arange(0.0,
                                        sim.heatingDuration + sim.coolingDuration,
                                        1/sim.sampleRate))

    return sim

def simTemp(sim, plot=True):
    """
    Accepts a simulated experiment and calculates its temperature over time,
    simulating forced heating and forced passive cooling
    """
    # Heating!
    # T(t) = rate * time
    heating = sim.heatingRate * sim.heatingTime + sim.coolTemp

    # Forced cooling!
    # T(t) = rate * time
    if sim.forcedCooling:
        cooling = sim.coolingRate * sim.coolingTime + sim.hotTemp

    # Passive cooling!
    # T(t) = (Tf-Ti)e^-(t/tau) + Tf
    else:
        coolingConst = 180.0 #tau = 180s = 3min
        cooling = ((sim.hotTemp - sim.coolTemp) * numpy.exp(-sim.coolingTime / coolingConst)) + sim.coolTemp

    # Combine heating & cooling
    sim.temp = numpy.concatenate((heating, cooling))
    
    if plot:
        quickplot(sim.time, sim.temp, "Simulated Temperature vs. Time",
                  "Time (sec)", "Temperature (K)")

def simRTD(sim, addNoise=True, plot=True):
    """
    Simulates RTD voltage (proportional to temp) vs. time,
    either with or without noise.
    """

    try:
        # R_RTD = 1000 + 3.85(T-273) (ohms)
        sim.rRTD = 1000 + 3.85 * (sim.temp - 273)
    except AttributeError:
        simTemp(sim, plot=False)
        # R_RTD = 1000 + 3.85(T-273) (ohms)
        sim.rRTD = 1000 + 3.85 * (sim.temp - 273)

    # RTD is in a voltage divider.
    # V across RTD = total voltage * (RTD / (other+RTD)).
    vPower = 15     #15V
    rOther = 20e3   #20kohms
    sim.vRTD = vPower * (sim.rRTD / (rOther + sim.rRTD))

    if addNoise:
        #gaussian noise with stdev=1mV
        noise = numpy.random.normal(scale=0.001, size=len(sim.vRTD))
        sim.vRTD += noise

    if plot:
        quickplot(sim.time, sim.vRTD, "Simulated RTD Voltage vs. Time",
                  "Time (sec)", "RTD Voltage (V)")
    
def simFluorescenceSignal(sim, gaussianNoise=True, shotNoise=True,
                          photobleaching=True, plotFluor=True,
                          plotSignal=True):
    """
    Calculates dsDNA fraction vs. time, scales to obtain fluorescence signal,
    and optionally adds noise and photobleaching.
    """
    title = "Fluorescence Signal Voltage vs. Time"
    
    sim.dsDnaFraction = DnaFraction(sim.concentration, sim.temp,
                                    sim.deltaS, sim.deltaH)

    # OK something is wrong with how this is implemented because when you
    # combine heating and photobleaching shit gets weirdly negative
    #simulate photobleaching of signal
    if photobleaching:

        #Set bleaching const such that 1/2 of fluorphores fail during experiment
        BleachingConstant = 0.5**(1.0/900)

        #compute difference
        sim.diffDsDnaFraction = numpy.diff(sim.dsDnaFraction)

        #initialize empty array
        sim.dsDnaFractionBleached = numpy.empty(len(sim.dsDnaFraction))

        #set initial condition for difference eqn
        sim.dsDnaFractionBleached[0] = 0.995                

        #For loop to compute difference equation
        for i in range(1, len(sim.dsDnaFraction)):
            sim.dsDnaFractionBleached[i] = BleachingConstant * sim.dsDnaFractionBleached[i-1] + sim.diffDsDnaFraction[i-1]
        title += "\n(+ Photobleaching)"

        #Moving variable names around to make the other code happy
        sim.dsDnaFractionUnbleached = sim.dsDnaFraction
        sim.dsDnaFraction = sim.dsDnaFractionBleached

    if plotFluor:
        quickplot(sim.time, sim.dsDnaFraction,
                  "Relative Fluorescence vs. Time",
                  "Time (sec)", "Relative Fluorescence (AU)")
            
    #Scale curve to simulate voltage output of photodiode / amplifier.
    #Amplifier range between 5 and 9 volts output
    ampRange = numpy.random.uniform(low=5.0, high=9.0)

    #Minimum between +/- 1
    ampMin = numpy.random.uniform(low=-1.0, high=1.0)

    #Scale output
    sim.fluorSignal = ampRange * sim.dsDnaFraction + ampMin 

    #gaussian random noise
    if gaussianNoise:
        #gaussian noise with stdev=10mV
        noise = numpy.random.normal(scale=0.01, size=len(sim.fluorSignal)) 
        sim.fluorSignal += noise
        title += " (+ Gaussian Noise)"

    #shot (impulse) noise
    if shotNoise:
        # Technically this should be poisson-distributed, but generating
        # poisson noise is actually not trivial. An acceptable substitute is
        # to make every sample have some probability P(spike) of being a spike
        # and probability 1-P(spike) of being the actual fluorescence sample.
        # (See the book "Numerical recipes in $language".)

        #Uniformly distributed random numbers
        noise = numpy.random.rand(len(sim.fluorSignal))

        #Probability of spike on any given sample
        spikeProbability = 1.0/75

        #Turn list of probabilities into list of zeros and 10V spikes
        noise = map(lambda i: 10 if i<spikeProbability else 0, noise)

        #Add in spikes
        sim.fluorSignal = [max(s, n) for s, n in zip(sim.fluorSignal, noise)] 
        title += " (+ Shot Noise)"

    if plotSignal:
        quickplot(sim.time, sim.fluorSignal, title,
                  "Time (sec)", "Fluorescence Signal Voltage (V)")

def writeOut(expt, filename, plot=True):
    """
    Writes out simulated data in the same format as the LabView VI does.
    I.e. each line is: RTD voltage, tab, fluorescence voltage, tab, some
    number whose significance I don't understand. (I'm including that
    mysterious number here, even though it doesn't matter for Matlab's file
    I/O, because Python's file I/O cares how many fields there are in a line,
    the way I've written it.
    """
    rtd = expt.vRTD
    fluor = expt.fluorSignal

    f = open(filename, 'w')

    for (i, j) in zip(rtd, fluor):
        f.write("%f\t%f\t%f\n" % (i, j, 1))
    f.close()
    if plot:
        quickplot(rtd, fluor, "Melting Curve of Simulated Data", "RTD Voltage", "Fluorescence Signal")
    

# If you run this script, plots will appear one after the other,
# following the sequence of the write-up.
# If you encounter a bug with the script drawing multiple plots in one run
# (may happen on Mac OSX), change plot=True to plot=False as desired to plot
# only one thing at a time.
# Change other parameters as desired to add or remove noise.

# ideal curve
ideal = makeIdeal(plot=True)
diffedCurve(ideal, plot=True)

# simulated realistic data
sim = makeSimulated(forcedCooling=True)
simTemp(sim, plot=True)
simRTD(sim, addNoise=True, plot=True)
simFluorescenceSignal(sim, gaussianNoise=True, shotNoise=True,
                      photobleaching=True, plotFluor=True, plotSignal=True)

#write out file of sim data
writeOut(sim, 'simdata.txt', plot=True)