Source code for stickydesign.energetics_daoe
from __future__ import division
import numpy as np
from .endclasses import pairseqa, tops, endarray, Energetics
from .version import __version__
from . import newparams as p
[docs]class EnergeticsDAOE(Energetics):
"""Energy functions based on several sources, primarily SantaLucia's
2004 paper, along with handling of dangles, tails, and nicks
specifically for DX tile sticky ends.
Parameters
----------
coaxparams : str or False, optional
choose the coaxial stacking parameters to use. False is no coaxial
stacking adjustment, other options are 'protozanova', 'peyret',
and 'pyshni'.
singlepair : bool, optional
Treat single base pair pairing as possible (defaults
to False).
temperature : float, optional
Temperature to use for the model, in degress Celsius (defaults to 37).
"""
def __init__(self,
temperature=37,
mismatchtype=None,
coaxparams=False,
singlepair=False,
danglecorr=True,
version=None,
enclass=None):
self.coaxparams = coaxparams
self.singlepair = singlepair
self.danglecorr = danglecorr
self.temperature = temperature
@property
def info(self):
info = {'enclass': 'EnergeticsDAOE',
'temperature': self.temperature,
'coaxparams': self.coaxparaminfo,
'danglecorr': self.danglecorr,
'singlepair': self.singlepair,
'version': __version__}
return info
@property
def temperature(self):
return self._temperature
@temperature.setter
def temperature(self, val):
self._temperature = val
self._setup_params(val)
def __str__(self):
return "EnergeticsDAOE(" + \
", ".join("{}={}".format(x, repr(y))
for x, y in self.info.items()) + \
")"
def __repr__(self):
return self.__str__()
def _setup_params(self, temperature=37):
self.initdG = p.initdG37 - (temperature - 37) * p.initdS
self.nndG = p.nndG37 - (temperature - 37) * p.nndS
if self.coaxparams == 'protozanova' or self.coaxparams is True:
self.coaxddG = p.coaxddG37 - (temperature - 37) * p.coaxddS
self.coaxparaminfo = 'protozanova'
self.coaxparams = True
elif self.coaxparams == 'peyret':
self.coaxddG = p.coax_peyret_ddG37 - (
temperature - 37) * p.coax_peyret_ddS
self.coaxparaminfo = self.coaxparams
self.coaxparams = True
elif self.coaxparams == 'pyshni':
self.coaxddG = p.coax_pyshni_ddG37 - (
temperature - 37) * p.coax_pyshni_ddS
self.coaxparaminfo = self.coaxparams
self.coaxparams = True
elif not self.coaxparams:
self.coaxddG = np.zeros_like(p.coaxddG37)
self.coaxparaminfo = self.coaxparams
self.coaxparams = False
else:
raise ValueError("Invalid coaxparams: {}".format(self.coaxparams),
self.coaxparams)
self.dangle5dG = p.dangle5dG37 - (temperature - 37) * p.dangle5dS
self.dangle3dG = p.dangle3dG37 - (temperature - 37) * p.dangle3dS
self.intmmdG = p.intmmdG37 - (temperature - 37) * p.intmmdS
self.ltmmdG_5335 = np.zeros(256)
self.rtmmdG_5335 = np.zeros(256)
self.intmmdG_5335 = np.zeros(256)
# Dumb setup. FIXME: do this cleverly
for i in range(0, 4):
for j in range(0, 4):
for k in range(0, 4):
self.ltmmdG_5335[
i * 64 + j * 16 + k * 4 +
j] = self.dangle5dG[i * 4
+ j] + self.dangle3dG[(3 - j) * 4
+ (3 - k)]
self.rtmmdG_5335[
i * 64 + j * 16 + i * 4 +
k] = self.dangle3dG[i * 4
+ j] + self.dangle5dG[(3 - k) * 4
+ (3 - i)]
self.intmmdG_5335[i * 64 + j * 16 + k * 4 +
j] = self.intmmdG[(3 - j) * 16
+ (3 - k) * 4 + i]
self.intmmdG_5335[i * 64 + j * 16 + i * 4 +
k] = self.intmmdG[i * 16
+ j * 4 + (3 - k)]
[docs] def matching_uniform(self, seqs):
ps = pairseqa(seqs)
# In both cases here, the energy we want is the NN binding energy of
# each stack,
if seqs.endtype == 'DT':
dcorr = np.zeros_like(self.dangle3dG[ps[:, 0]])
if self.danglecorr:
dcorr += -self.dangle3dG[ps[:, 0]] - self.dangle3dG[ps.revcomp(
)[:, 0]]
if self.coaxparams:
dcorr += self.coaxddG[ps[:, 0]] + self.coaxddG[ps.revcomp()[:,
0]]
elif seqs.endtype == 'TD':
dcorr = np.zeros_like(self.dangle5dG[ps[:, -1]])
if self.danglecorr:
dcorr += (-self.dangle5dG[ps[:, -1]] -
self.dangle5dG[ps.revcomp()[:, -1]])
if self.coaxparams:
dcorr += self.coaxddG[ps[:, -1]] + self.coaxddG[ps.revcomp()
[:, -1]]
else:
raise NotImplementedError
return -(np.sum(self.nndG[ps], axis=1) + self.initdG + dcorr)
[docs] def uniform(self, seqs1, seqs2, debug=False):
if seqs1.shape != seqs2.shape:
if seqs1.ndim == 1:
seqs1 = endarray(
np.repeat(np.array([seqs1]), seqs2.shape[0], 0),
seqs1.endtype)
else:
raise ValueError(
"Lengths of sequence arrays are not acceptable.")
assert seqs1.endtype == seqs2.endtype
endtype = seqs1.endtype
s1 = tops(seqs1)
s2 = tops(seqs2)
l = s1.shape[1]
# s2r is revcomp pairseq of s2.
s2r = np.fliplr(np.invert(s2) % 16)
s2r = s2r // 4 + 4 * (s2r % 4)
alloffset_max = np.zeros(
s1.shape[0]) # store for max binding at any offset
if endtype == 'TD':
s1_end = s1[:, 0:-1] #
s2_end_rc = s2r[:, 1:]
s1l = np.hstack(((4 * (s2r[:, 0] // 4) + s1[:, 0] // 4).reshape(
-1, 1), s1))
s2rl = np.hstack(
(s2r, (4 * (s2r[:, -1] % 4) + s1[:, -1] % 4).reshape(-1, 1)))
elif endtype == 'DT':
s1_end = s1[:, 1:]
s2_end_rc = s2r[:, 0:-1]
s2rl = np.hstack(((4 * (s1[:, 0] // 4) + s2r[:, 0] // 4).reshape(
-1, 1), s2r))
s1l = np.hstack(
(s1, (4 * (s1[:, -1] % 4) + s2r[:, -1] % 4).reshape(-1, 1)))
else:
raise NotImplementedError
for offset in range(-l + 2, l - 1):
if offset > 0:
if endtype == 'TD':
# Energies of matching stacks, zero otherwise. Can be used
# to check match.
ens = (s1_end[:, :-offset] == s2_end_rc[:, offset:]) * (
-self.nndG[s1_end[:, :-offset]])
ens[:, 0] += (
ens[:, 0] != 0) * (-self.dangle3dG[s1_end[:, -offset]]
) # - for positive sign
ens[:, -1] += (
ens[:, -1] != 0) * (-self.dangle3dG[s2[:, -offset - 1]]
) # - for positive sign
ltmm = -self.ltmmdG_5335[s1_end[:, :-offset] * 16
+ s2_end_rc[:, offset:]]
rtmm = -self.rtmmdG_5335[s1_end[:, :-offset] * 16
+ s2_end_rc[:, offset:]]
intmm = -self.intmmdG_5335[s1_end[:, :-offset] * 16
+ s2_end_rc[:, offset:]]
if endtype == 'DT':
ens = (s1_end[:, offset:] == s2_end_rc[:, :-offset]) * (
-self.nndG[s1_end[:, offset:]])
ens[:, 0] += (ens[:, 0] != 0) * (
-self.dangle5dG[s1_end[:, offset - 1]]
) # - for positive sign
ens[:, -1] += (ens[:, -1] != 0) * (
-self.dangle5dG[s2[:, offset]]) # - for positive sign
ltmm = -self.ltmmdG_5335[s1_end[:, offset:] * 16
+ s2_end_rc[:, :-offset]]
rtmm = -self.rtmmdG_5335[s1_end[:, offset:] * 16
+ s2_end_rc[:, :-offset]]
intmm = -self.intmmdG_5335[s1_end[:, offset:] * 16
+ s2_end_rc[:, :-offset]]
elif offset == 0:
ens = (s1_end == s2_end_rc) * (-self.nndG[s1_end])
if endtype == 'DT':
ens[:, 0] += (ens[:, 0] != 0) * (
+self.dangle3dG[s1[:, 0]] -
self.coaxparams * self.coaxddG[s1[:, 0]]) - (
s1l[:, 0] == s2rl[:, 0]
) * self.nndG[s1l[:, 0]] # sign reversed
ens[:, -1] += (ens[:, -1] != 0) * (
+self.dangle3dG[s2[:, 0]] -
self.coaxparams * self.coaxddG[s2[:, 0]]) - (
s1l[:, -1] == s2rl[:, -1]
) * self.nndG[s1l[:, -1]] # sign reversed
if endtype == 'TD':
ens[:, 0] += +(ens[:, 0] != 0) * (
self.dangle5dG[s1[:, -1]] -
self.coaxparams * self.coaxddG[s1[:, -1]]) - (
s1l[:, 0] == s2rl[:, 0]
) * self.nndG[s1l[:, 0]] # sign reversed
ens[:, -1] += +(ens[:, -1] != 0) * (
self.dangle5dG[s2[:, -1]] -
self.coaxparams * self.coaxddG[s2[:, -1]]) - (
s1l[:, -1] == s2rl[:, -1]
) * self.nndG[s1l[:, -1]] # sign reversed
ltmm = np.zeros_like(ens)
rtmm = np.zeros_like(ens)
intmm = np.zeros_like(ens)
ltmm = -self.ltmmdG_5335[s1_end[:, :] * 16 + s2_end_rc[:, :]]
rtmm = -self.rtmmdG_5335[s1_end[:, :] * 16 + s2_end_rc[:, :]]
intmm = -self.intmmdG_5335[s1_end[:, :] * 16 + s2_end_rc[:, :]]
else: # offset < 0
if endtype == 'TD':
ens = (s1_end[:, -offset:] == s2_end_rc[:, :offset]) * (
-self.nndG[s1_end[:, -offset:]])
ens[:, 0] += (ens[:, 0] != 0) * (
-self.nndG[s1[:, -1]] - p.tailcordG37 +
self.dangle5dG[s1[:, -1]]) # - for positive sign
ens[:, -1] += (ens[:, -1] != 0) * (
-self.nndG[s2[:, -1]] - p.tailcordG37 +
self.dangle5dG[s2[:, -1]]) # - for positive sign
ltmm = -self.ltmmdG_5335[s1_end[:, -offset:] * 16
+ s2_end_rc[:, :offset]]
rtmm = -self.rtmmdG_5335[s1_end[:, -offset:] * 16
+ s2_end_rc[:, :offset]]
intmm = -self.intmmdG_5335[s1_end[:, -offset:] * 16
+ s2_end_rc[:, :offset]]
elif endtype == 'DT':
ens = (s1_end[:, :offset] == s2_end_rc[:, -offset:]) * (
-self.nndG[s1_end[:, :offset]])
ens[:, 0] += (ens[:, 0] != 0) * (
-self.nndG[s1[:, 0]] - p.tailcordG37 +
self.dangle3dG[s1[:, 0]]) # - for positive sign
ens[:, -1] += (ens[:, -1] != 0) * (
-self.nndG[s2[:, 0]] - p.tailcordG37 +
self.dangle3dG[s2[:, 0]]) # - for positive sign
ltmm = -self.ltmmdG_5335[s1_end[:, :offset] * 16
+ s2_end_rc[:, -offset:]]
rtmm = -self.rtmmdG_5335[s1_end[:, :offset] * 16
+ s2_end_rc[:, -offset:]]
intmm = -self.intmmdG_5335[s1_end[:, :offset] * 16
+ s2_end_rc[:, -offset:]]
bindmax = np.zeros(ens.shape[0])
if debug:
print(offset, ens.view(np.ndarray), ltmm, rtmm, intmm)
for e in range(0, ens.shape[0]):
acc = 0
for i in range(0, ens.shape[1]):
if ens[e, i] != 0:
# we're matching. add the pair to the accumulator
acc += ens[e, i]
elif rtmm[e, i] != 0 and i > 0 and ens[e, i-1] > 0:
# we're mismatching on the right: see if right-dangling
# is highest binding so far, and continue, adding intmm
# to accumulator.
if acc + rtmm[e, i] > bindmax[e]:
bindmax[e] = acc + rtmm[e, i]
acc += intmm[e, i]
elif ltmm[e, i] != 0 and i < ens.shape[1] - 1:
# don't do this for the last pair we're mismatching on
# the left: see if our ltmm is stronger than our
# accumulated binding+intmm. If so, reset to ltmm and
# continue as left-dangling, or reset to 0 if ltmm+next
# is weaker than next dangle,or next is also a mismatch
# (fixme: good idea?). If not, continue as internal
# mismatch.
if (not self.singlepair) and (ltmm[e, i] >
acc + intmm[e, i]) and (
ens[e, i + 1] > 0):
acc = ltmm[e, i]
elif (self.singlepair) and (ltmm[e, i] >
acc + intmm[e, i]):
acc = ltmm[e, i]
else:
acc += intmm[e, i]
else: # we're at a loop. Add stuff.
acc -= p.looppenalty
bindmax[e] = max(bindmax[e], acc)
alloffset_max = np.maximum(alloffset_max, bindmax)
return alloffset_max - self.initdG