Source code for stickydesign.energetics_daoe

from __future__ import division
import numpy as np
from .endclasses import pairseqa, tops, endarray, Energetics
from .version import __version__

from . import newparams as p


[docs]class EnergeticsDAOE(Energetics):
    """Energy functions based on several sources, primarily SantaLucia's
    2004 paper, along with handling of dangles, tails, and nicks
    specifically for DX tile sticky ends.

    
    Parameters
    ----------
    coaxparams : str or False, optional
        choose the coaxial stacking parameters to use.  False is no coaxial
        stacking adjustment, other options are 'protozanova', 'peyret',
        and 'pyshni'.

    singlepair : bool, optional
        Treat single base pair pairing as possible (defaults
        to False).

    temperature : float, optional
        Temperature to use for the model, in degress Celsius (defaults to 37).

    """

    def __init__(self,
                 temperature=37,
                 mismatchtype=None,
                 coaxparams=False,
                 singlepair=False,
                 danglecorr=True,
                 version=None,
                 enclass=None):
        self.coaxparams = coaxparams
        self.singlepair = singlepair
        self.danglecorr = danglecorr
        self.temperature = temperature

    @property
    def info(self):
        info = {'enclass': 'EnergeticsDAOE',
                'temperature': self.temperature,
                'coaxparams': self.coaxparaminfo,
                'danglecorr': self.danglecorr,
                'singlepair': self.singlepair,
                'version': __version__}
        return info

    @property
    def temperature(self):
        return self._temperature

    @temperature.setter
    def temperature(self, val):
        self._temperature = val
        self._setup_params(val)
    
    def __str__(self):
        return "EnergeticsDAOE(" + \
            ", ".join("{}={}".format(x, repr(y))
                      for x, y in self.info.items()) + \
            ")"

    def __repr__(self):
        return self.__str__()
    
    def _setup_params(self, temperature=37):
        self.initdG = p.initdG37 - (temperature - 37) * p.initdS
        self.nndG = p.nndG37 - (temperature - 37) * p.nndS
        if self.coaxparams == 'protozanova' or self.coaxparams is True:
            self.coaxddG = p.coaxddG37 - (temperature - 37) * p.coaxddS
            self.coaxparaminfo = 'protozanova'
            self.coaxparams = True
        elif self.coaxparams == 'peyret':
            self.coaxddG = p.coax_peyret_ddG37 - (
                temperature - 37) * p.coax_peyret_ddS
            self.coaxparaminfo = self.coaxparams
            self.coaxparams = True
        elif self.coaxparams == 'pyshni':
            self.coaxddG = p.coax_pyshni_ddG37 - (
                temperature - 37) * p.coax_pyshni_ddS
            self.coaxparaminfo = self.coaxparams
            self.coaxparams = True
        elif not self.coaxparams:
            self.coaxddG = np.zeros_like(p.coaxddG37)
            self.coaxparaminfo = self.coaxparams
            self.coaxparams = False
        else:
            raise ValueError("Invalid coaxparams: {}".format(self.coaxparams),
                             self.coaxparams)
        self.dangle5dG = p.dangle5dG37 - (temperature - 37) * p.dangle5dS
        self.dangle3dG = p.dangle3dG37 - (temperature - 37) * p.dangle3dS
        self.intmmdG = p.intmmdG37 - (temperature - 37) * p.intmmdS

        self.ltmmdG_5335 = np.zeros(256)
        self.rtmmdG_5335 = np.zeros(256)
        self.intmmdG_5335 = np.zeros(256)

        # Dumb setup. FIXME: do this cleverly
        for i in range(0, 4):
            for j in range(0, 4):
                for k in range(0, 4):
                    self.ltmmdG_5335[
                        i * 64 + j * 16 + k * 4 +
                        j] = self.dangle5dG[i * 4
                                            + j] + self.dangle3dG[(3 - j) * 4
                                                                  + (3 - k)]
                    self.rtmmdG_5335[
                        i * 64 + j * 16 + i * 4 +
                        k] = self.dangle3dG[i * 4
                                            + j] + self.dangle5dG[(3 - k) * 4
                                                                  + (3 - i)]
                    self.intmmdG_5335[i * 64 + j * 16 + k * 4 +
                                      j] = self.intmmdG[(3 - j) * 16
                                                        + (3 - k) * 4 + i]
                    self.intmmdG_5335[i * 64 + j * 16 + i * 4 +
                                      k] = self.intmmdG[i * 16
                                                        + j * 4 + (3 - k)]

[docs]    def matching_uniform(self, seqs):
        ps = pairseqa(seqs)

        # In both cases here, the energy we want is the NN binding energy of
        # each stack,
        if seqs.endtype == 'DT':
            dcorr = np.zeros_like(self.dangle3dG[ps[:, 0]])
            if self.danglecorr:
                dcorr += -self.dangle3dG[ps[:, 0]] - self.dangle3dG[ps.revcomp(
                )[:, 0]]
            if self.coaxparams:
                dcorr += self.coaxddG[ps[:, 0]] + self.coaxddG[ps.revcomp()[:,
                                                                            0]]
        elif seqs.endtype == 'TD':
            dcorr = np.zeros_like(self.dangle5dG[ps[:, -1]])
            if self.danglecorr:
                dcorr += (-self.dangle5dG[ps[:, -1]] -
                          self.dangle5dG[ps.revcomp()[:, -1]])
            if self.coaxparams:
                dcorr += self.coaxddG[ps[:, -1]] + self.coaxddG[ps.revcomp()
                                                                [:, -1]]
        else:
            raise NotImplementedError
        return -(np.sum(self.nndG[ps], axis=1) + self.initdG + dcorr)

[docs]    def uniform(self, seqs1, seqs2, debug=False):
        if seqs1.shape != seqs2.shape:
            if seqs1.ndim == 1:
                seqs1 = endarray(
                    np.repeat(np.array([seqs1]), seqs2.shape[0], 0),
                    seqs1.endtype)
            else:
                raise ValueError(
                    "Lengths of sequence arrays are not acceptable.")

        assert seqs1.endtype == seqs2.endtype
        endtype = seqs1.endtype

        s1 = tops(seqs1)
        s2 = tops(seqs2)
        l = s1.shape[1]

        # s2r is revcomp pairseq of s2.
        s2r = np.fliplr(np.invert(s2) % 16)
        s2r = s2r // 4 + 4 * (s2r % 4)

        alloffset_max = np.zeros(
            s1.shape[0])  # store for max binding at any offset

        if endtype == 'TD':
            s1_end = s1[:, 0:-1]  #
            s2_end_rc = s2r[:, 1:]
            s1l = np.hstack(((4 * (s2r[:, 0] // 4) + s1[:, 0] // 4).reshape(
                -1, 1), s1))
            s2rl = np.hstack(
                (s2r, (4 * (s2r[:, -1] % 4) + s1[:, -1] % 4).reshape(-1, 1)))
        elif endtype == 'DT':
            s1_end = s1[:, 1:]
            s2_end_rc = s2r[:, 0:-1]
            s2rl = np.hstack(((4 * (s1[:, 0] // 4) + s2r[:, 0] // 4).reshape(
                -1, 1), s2r))
            s1l = np.hstack(
                (s1, (4 * (s1[:, -1] % 4) + s2r[:, -1] % 4).reshape(-1, 1)))
        else:
            raise NotImplementedError

        for offset in range(-l + 2, l - 1):
            if offset > 0:
                if endtype == 'TD':
                    # Energies of matching stacks, zero otherwise. Can be used
                    # to check match.
                    ens = (s1_end[:, :-offset] == s2_end_rc[:, offset:]) * (
                        -self.nndG[s1_end[:, :-offset]])
                    ens[:, 0] += (
                        ens[:, 0] != 0) * (-self.dangle3dG[s1_end[:, -offset]]
                                           )  # - for positive sign
                    ens[:, -1] += (
                        ens[:, -1] != 0) * (-self.dangle3dG[s2[:, -offset - 1]]
                                            )  # - for positive sign
                    ltmm = -self.ltmmdG_5335[s1_end[:, :-offset] * 16
                                             + s2_end_rc[:, offset:]]
                    rtmm = -self.rtmmdG_5335[s1_end[:, :-offset] * 16
                                             + s2_end_rc[:, offset:]]
                    intmm = -self.intmmdG_5335[s1_end[:, :-offset] * 16
                                               + s2_end_rc[:, offset:]]
                if endtype == 'DT':
                    ens = (s1_end[:, offset:] == s2_end_rc[:, :-offset]) * (
                        -self.nndG[s1_end[:, offset:]])
                    ens[:, 0] += (ens[:, 0] != 0) * (
                        -self.dangle5dG[s1_end[:, offset - 1]]
                    )  # - for positive sign
                    ens[:, -1] += (ens[:, -1] != 0) * (
                        -self.dangle5dG[s2[:, offset]])  # - for positive sign
                    ltmm = -self.ltmmdG_5335[s1_end[:, offset:] * 16
                                             + s2_end_rc[:, :-offset]]
                    rtmm = -self.rtmmdG_5335[s1_end[:, offset:] * 16
                                             + s2_end_rc[:, :-offset]]
                    intmm = -self.intmmdG_5335[s1_end[:, offset:] * 16
                                               + s2_end_rc[:, :-offset]]
            elif offset == 0:
                ens = (s1_end == s2_end_rc) * (-self.nndG[s1_end])
                if endtype == 'DT':
                    ens[:, 0] += (ens[:, 0] != 0) * (
                        +self.dangle3dG[s1[:, 0]] -
                        self.coaxparams * self.coaxddG[s1[:, 0]]) - (
                            s1l[:, 0] == s2rl[:, 0]
                        ) * self.nndG[s1l[:, 0]]  # sign reversed
                    ens[:, -1] += (ens[:, -1] != 0) * (
                        +self.dangle3dG[s2[:, 0]] -
                        self.coaxparams * self.coaxddG[s2[:, 0]]) - (
                            s1l[:, -1] == s2rl[:, -1]
                        ) * self.nndG[s1l[:, -1]]  # sign reversed
                if endtype == 'TD':
                    ens[:, 0] += +(ens[:, 0] != 0) * (
                        self.dangle5dG[s1[:, -1]] -
                        self.coaxparams * self.coaxddG[s1[:, -1]]) - (
                            s1l[:, 0] == s2rl[:, 0]
                        ) * self.nndG[s1l[:, 0]]  # sign reversed
                    ens[:, -1] += +(ens[:, -1] != 0) * (
                        self.dangle5dG[s2[:, -1]] -
                        self.coaxparams * self.coaxddG[s2[:, -1]]) - (
                            s1l[:, -1] == s2rl[:, -1]
                        ) * self.nndG[s1l[:, -1]]  # sign reversed
                ltmm = np.zeros_like(ens)
                rtmm = np.zeros_like(ens)
                intmm = np.zeros_like(ens)
                ltmm = -self.ltmmdG_5335[s1_end[:, :] * 16 + s2_end_rc[:, :]]
                rtmm = -self.rtmmdG_5335[s1_end[:, :] * 16 + s2_end_rc[:, :]]
                intmm = -self.intmmdG_5335[s1_end[:, :] * 16 + s2_end_rc[:, :]]
            else:  # offset < 0
                if endtype == 'TD':
                    ens = (s1_end[:, -offset:] == s2_end_rc[:, :offset]) * (
                        -self.nndG[s1_end[:, -offset:]])
                    ens[:, 0] += (ens[:, 0] != 0) * (
                        -self.nndG[s1[:, -1]] - p.tailcordG37 +
                        self.dangle5dG[s1[:, -1]])  # - for positive sign
                    ens[:, -1] += (ens[:, -1] != 0) * (
                        -self.nndG[s2[:, -1]] - p.tailcordG37 +
                        self.dangle5dG[s2[:, -1]])  # - for positive sign
                    ltmm = -self.ltmmdG_5335[s1_end[:, -offset:] * 16
                                             + s2_end_rc[:, :offset]]
                    rtmm = -self.rtmmdG_5335[s1_end[:, -offset:] * 16
                                             + s2_end_rc[:, :offset]]
                    intmm = -self.intmmdG_5335[s1_end[:, -offset:] * 16
                                               + s2_end_rc[:, :offset]]
                elif endtype == 'DT':
                    ens = (s1_end[:, :offset] == s2_end_rc[:, -offset:]) * (
                        -self.nndG[s1_end[:, :offset]])
                    ens[:, 0] += (ens[:, 0] != 0) * (
                        -self.nndG[s1[:, 0]] - p.tailcordG37 +
                        self.dangle3dG[s1[:, 0]])  # - for positive sign
                    ens[:, -1] += (ens[:, -1] != 0) * (
                        -self.nndG[s2[:, 0]] - p.tailcordG37 +
                        self.dangle3dG[s2[:, 0]])  # - for positive sign
                    ltmm = -self.ltmmdG_5335[s1_end[:, :offset] * 16
                                             + s2_end_rc[:, -offset:]]
                    rtmm = -self.rtmmdG_5335[s1_end[:, :offset] * 16
                                             + s2_end_rc[:, -offset:]]
                    intmm = -self.intmmdG_5335[s1_end[:, :offset] * 16
                                               + s2_end_rc[:, -offset:]]
            bindmax = np.zeros(ens.shape[0])
            if debug:
                print(offset, ens.view(np.ndarray), ltmm, rtmm, intmm)
            for e in range(0, ens.shape[0]):
                acc = 0
                for i in range(0, ens.shape[1]):
                    if ens[e, i] != 0:
                        # we're matching. add the pair to the accumulator
                        acc += ens[e, i]
                    elif rtmm[e, i] != 0 and i > 0 and ens[e, i-1] > 0:
                        # we're mismatching on the right: see if right-dangling
                        # is highest binding so far, and continue, adding intmm
                        # to accumulator.
                        if acc + rtmm[e, i] > bindmax[e]:
                            bindmax[e] = acc + rtmm[e, i]
                        acc += intmm[e, i]
                    elif ltmm[e, i] != 0 and i < ens.shape[1] - 1:
                        # don't do this for the last pair we're mismatching on
                        # the left: see if our ltmm is stronger than our
                        # accumulated binding+intmm. If so, reset to ltmm and
                        # continue as left-dangling, or reset to 0 if ltmm+next
                        # is weaker than next dangle,or next is also a mismatch
                        # (fixme: good idea?). If not, continue as internal
                        # mismatch.
                        if (not self.singlepair) and (ltmm[e, i] >
                                                      acc + intmm[e, i]) and (
                                                          ens[e, i + 1] > 0):
                            acc = ltmm[e, i]
                        elif (self.singlepair) and (ltmm[e, i] >
                                                    acc + intmm[e, i]):
                            acc = ltmm[e, i]
                        else:
                            acc += intmm[e, i]
                    else:  # we're at a loop. Add stuff.
                        acc -= p.looppenalty
                bindmax[e] = max(bindmax[e], acc)
            alloffset_max = np.maximum(alloffset_max, bindmax)
        return alloffset_max - self.initdG