"""DNA sequence"""
import string
"""
import os
import ctypes
# gcc -O2 complement.c --shared -Wl,-soname,complement
# -rdynamic -o complement.so -fPIC -I/usr/include/python2.7
from os.path import join as pj
p = os.path.abspath(os.path.dirname(__file__))
try:
cseqkit = ctypes.CDLL("complement.so")
cseqkit.dna_complement.restype = ctypes.c_char_p
except:
try:
cseqkit = ctypes.cdll.LoadLibrary(pj(p, "complement.so"))
cseqkit.dna_complement.restype = ctypes.c_char_p
except:
print("coudl not install lib")
"""
from biokit.sequence.seq import Sequence
__all__ = ['DNA']
[docs]class DNA(Sequence):
"""a DNA :class:`~biokit.sequence.seq.Sequence`.
You can add DNA sequences together::
>>> from biokit import DNA
>>> s1 = DNA('ACGT')
>>> s2 = DNA('AAAA')
>>> s1 + s2
Sequence: ACGTAAAA (length 8)
"""
def __init__(self, data=''):
super(DNA, self).__init__(data)
self.symbols = 'ACGTacgt'
try:
self._translate = string.maketrans('ACGTacgt', 'TGCAtgca')
except:
self._translate = bytes.maketrans(b'ACGTacgt', b'TGCAtgca')
self._type = 'DNA'
[docs] def get_complement(self):
compl = self._data.translate(self._translate)
return DNA(compl)
complement = property(get_complement)
[docs] def get_reverse_complement(self):
complement = self.get_complement()
return DNA(complement._data[::-1])
reverse_complement = property(get_reverse_complement)
#def _get_complement_in_c(self):
# return cseqkit.dna_complement(self.sequence, len(self.sequence))
#complement2 = property(_get_complement_in_c)
#
#def get_complement_c(self):
# print("Experimetal. Not faster than Python...")
# return cseqkit.dna_complement(self._data, self._N)
[docs] def gc_content(self, letters='CGS'):
"""Returns the G+C content in percentage.
Copes mixed case sequences, and with the ambiguous nucleotide S (G or C)
when counting the G and C content.
::
>>> from biokit.sequence.dna import DNA
>>> d = DNA("ACGTSAAA")
>>> d.gc_content()
0.375
"""
if len(self) == 0:
denom = 1.
else:
denom = float(self._N)
letters = [x.upper() for x in letters] + [x.lower() for x in letters]
letters = list(set(letters))
counter = sum(self._data.count(x) for x in letters)
return 100. * counter / denom
[docs] def get_rna(self):
from biokit.sequence.rna import RNA
# here a copy is made
seq = self._data.replace('T', 'U')
seq = seq.replace('t', 'u')
return RNA(seq)