Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions src/designer_dna/_oligonucleotides.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,9 @@ def manacher(sequence: str, dna: bool = ...) -> str:
dna (bool): Sequence is DNA, else RNA.

Returns:
(str): longest palindromic substring within sequence.
(str) Longest palindromic substring within a sequence.

Notes:
* This is a cython/c++ implementation of the O(n) Manacher's algorithm.
* This algorithm is typically slower than the O(nlogn) palindrome function for
strings up to 2^23 characters (not benchmarked beyond this limit).
* This function here is primarily here for demonstration purposes.

"""
36 changes: 24 additions & 12 deletions src/designer_dna/_oligonucleotides.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,36 @@
# distutils: language = c++

"""Oligonucleotide functions with the help of C++."""
from narray cimport NumericArray
from designer_dna._oligos cimport v_complement
from common cimport StringView, str_to_view
from libc.stdlib cimport free

from libc.stdlib cimport free

cdef extern from "Python.h":
str PyUnicode_Join(str, str)

from common cimport StringView, str_to_view
from narray cimport NumericArray

from designer_dna._oligos cimport v_complement


cdef inline void _compute(
char* s,
char* c,
unsigned char* s,
unsigned char* c,
NumericArray[int]* arr,
ssize_t n,
):
"""Primary computation behind manacher's algorithm.

Args:
s (uchar*): nucleotide sequence
c (uchar*): complement of nucleotide sequence
arr (NumericArray[int]*): an array of integers
n (ssize_t): length of input sequence, s.

Returns:
(void) relevant data saved in place to NumericArray

"""
cdef:
ssize_t mirror, a, b, i, stemp, center = 0, radius = 0
int temp, zero = 0
Expand Down Expand Up @@ -92,13 +106,10 @@ cpdef str manacher(str sequence, bint dna = True):
dna (bool): Sequence is DNA, else RNA.

Returns:
(str): longest palindromic substring within sequence.
(str) Longest palindromic substring within a sequence.

Notes:
* This is a cython/c++ implementation of the O(n) Manacher's algorithm.
* This algorithm is typically slower than the O(nlogn) palindrome function for
strings up to 2^23 characters (not benchmarked beyond this limit).
* This function here is primarily here for demonstration purposes.

"""
cdef:
Expand All @@ -117,11 +128,12 @@ cpdef str manacher(str sequence, bint dna = True):
free(ref.ptr)
free(com.ptr)

# Enumerate, capturing index (center) and value of max (radius)
# Enumerate, capturing index (center) at value of max (radius)
for i in range(1, ref.size - 1):
if arr[0][i] > radius:
radius = arr[0][i]
center = i
del arr

return k[center - radius + 1: center + radius: 2]
# By nature, a palindrome is symmetrical around center (+/- radius)
return sequence[(center - radius + 1) // 2 - 1: (center + radius) // 2]
10 changes: 7 additions & 3 deletions src/designer_dna/_oligos.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,15 @@
from common cimport StringView

cdef:
void c_reverse(char*, Py_ssize_t)
void c_reverse(unsigned char*, Py_ssize_t)
void v_reverse(StringView*)

void c_complement(char*, Py_ssize_t, unsigned char*)
void c_complement(unsigned char*, Py_ssize_t, bint)
void v_complement(StringView*, bint)

void c_reverse_complement(char*, Py_ssize_t, unsigned char*)
void c_reverse_complement(unsigned char*, Py_ssize_t, bint)
void v_reverse_complement(StringView*, bint)

(Py_ssize_t, Py_ssize_t) c_palindrome(unsigned char*, Py_ssize_t, bint)
int c_stretch(unsigned char*, Py_ssize_t)
int c_nrepeats(unsigned char*, int, int)
73 changes: 67 additions & 6 deletions src/designer_dna/_oligos.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,19 @@
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

"""Cythonized oligonucleotide functions."""
from array import array
from typing import Any

def m_reverse(sequence: array[int]) -> Any:
"""Reverse a nucleotide sequence.

Args:
sequence (uchar[]): Nucleotide sequence writeable memory view.

Returns:
(void) Reverse a sequence in place.

"""

def reverse(sequence: str) -> str:
"""Reverse a nucleotide sequence.
Expand All @@ -46,6 +58,18 @@ def reverse(sequence: str) -> str:

"""

def m_complement(sequence: array[int], dna: bool = ...) -> Any:
"""Complement a nucleotide sequence.

Args:
sequence (uchar[]): Nucleotide sequence writeable memory view.
dna (bool): Sequence is DNA, else RNA.

Returns:
(void) Complement nucleotide sequence in place.

"""

def complement(sequence: str, dna: bool = ...) -> str:
"""Complement a nucleotide sequence.

Expand All @@ -64,6 +88,18 @@ def complement(sequence: str, dna: bool = ...) -> str:

"""

def m_reverse_complement(sequence: array[int], dna: bool = ...) -> Any:
"""Reverse complement a nucleotide sequence.

Args:
sequence (uchar[]): Nucleotide sequence writeable memory view.
dna (bool): Sequence is DNA, else RNA.

Returns:
(void) Reverse complement nucleotide sequence in place.

"""

def reverse_complement(sequence: str, dna: bool = ...) -> str:
"""Reverse complement a nucleotide sequence.

Expand All @@ -90,31 +126,40 @@ def palindrome(sequence: str, dna: bool = ...) -> str:
dna (bool): Sequence is DNA, else RNA.

Returns:
(str): longest palindromic subsequence within sequence.
(str) longest palindromic subsequence within sequence.

Examples:
.. code-block:: python

palindrome("ATAT") == "ATAT"
palindrome("GATATG") == "ATAT"
palindrome("ANT") == "ANT" # Handles degenerate bases
palindrome("UGCA", False) == "UGCA" # Handles RNA sequences

Notes:
* Algorithmic time complexity O(NlogN).
* If a sequence contains two or more palindromic substrings of equal size, the
first leftmost palindrome is prioritized.

"""

def m_stretch(sequence: array[int]) -> int:
"""Return the maximum length of a single letter (nucleotide) repeat in a string.

Args:
sequence (uchar[]): Nucleotide sequence writeable memory view.

Returns:
(int) Length of maximum run of a single letter.

"""

def stretch(sequence: str) -> int:
"""Return the maximum length of a single letter (nucleotide) repeat in a string.

Args:
sequence (str): Nucleotide sequence string.

Returns:
(int): Length of maximum run of a single letter.
(int) Length of maximum run of a single letter.

Examples:
.. code-block:: python
Expand All @@ -124,6 +169,22 @@ def stretch(sequence: str) -> int:

"""

def m_nrepeats(sequence: array[int], n: int) -> int:
"""Calculate the maximum observed repeats of composite pattern size n characters.

Args:
sequence (uchar[]): Nucleotide sequence string.
n (int): Size of k-mers (composite pattern) to observe.

Returns:
(int) The longest tandem run of nucleotides comprised of a composite pattern
of length n characters.

Raises:
ZeroDivisionError: if value of n is 0.

"""

def nrepeats(sequence: str, n: int) -> int:
"""Calculate the maximum observed repeats of composite pattern size n characters.

Expand All @@ -136,7 +197,7 @@ def nrepeats(sequence: str, n: int) -> int:
of length n characters.

Raises:
ValueError: if value of n is less than 1.
ZeroDivisionError: if value of n is 0.

Examples:
.. code-block:: python
Expand Down
Loading
Loading