diff --git a/src/designer_dna/_oligonucleotides.pyi b/src/designer_dna/_oligonucleotides.pyi index 6bb0a89..74dd7a4 100644 --- a/src/designer_dna/_oligonucleotides.pyi +++ b/src/designer_dna/_oligonucleotides.pyi @@ -37,12 +37,9 @@ def manacher(sequence: str, dna: bool = ...) -> str: dna (bool): Sequence is DNA, else RNA. Returns: - (str): longest palindromic substring within sequence. + (str) Longest palindromic substring within a sequence. Notes: * This is a cython/c++ implementation of the O(n) Manacher's algorithm. - * This algorithm is typically slower than the O(nlogn) palindrome function for - strings up to 2^23 characters (not benchmarked beyond this limit). - * This function here is primarily here for demonstration purposes. """ diff --git a/src/designer_dna/_oligonucleotides.pyx b/src/designer_dna/_oligonucleotides.pyx index f9bf877..2df65f4 100644 --- a/src/designer_dna/_oligonucleotides.pyx +++ b/src/designer_dna/_oligonucleotides.pyx @@ -30,22 +30,36 @@ # distutils: language = c++ """Oligonucleotide functions with the help of C++.""" -from narray cimport NumericArray -from designer_dna._oligos cimport v_complement -from common cimport StringView, str_to_view -from libc.stdlib cimport free +from libc.stdlib cimport free cdef extern from "Python.h": str PyUnicode_Join(str, str) +from common cimport StringView, str_to_view +from narray cimport NumericArray + +from designer_dna._oligos cimport v_complement + cdef inline void _compute( - char* s, - char* c, + unsigned char* s, + unsigned char* c, NumericArray[int]* arr, ssize_t n, ): + """Primary computation behind manacher's algorithm. + + Args: + s (uchar*): nucleotide sequence + c (uchar*): complement of nucleotide sequence + arr (NumericArray[int]*): an array of integers + n (ssize_t): length of input sequence, s. + + Returns: + (void) relevant data saved in place to NumericArray + + """ cdef: ssize_t mirror, a, b, i, stemp, center = 0, radius = 0 int temp, zero = 0 @@ -92,13 +106,10 @@ cpdef str manacher(str sequence, bint dna = True): dna (bool): Sequence is DNA, else RNA. Returns: - (str): longest palindromic substring within sequence. + (str) Longest palindromic substring within a sequence. Notes: * This is a cython/c++ implementation of the O(n) Manacher's algorithm. - * This algorithm is typically slower than the O(nlogn) palindrome function for - strings up to 2^23 characters (not benchmarked beyond this limit). - * This function here is primarily here for demonstration purposes. """ cdef: @@ -117,11 +128,12 @@ cpdef str manacher(str sequence, bint dna = True): free(ref.ptr) free(com.ptr) - # Enumerate, capturing index (center) and value of max (radius) + # Enumerate, capturing index (center) at value of max (radius) for i in range(1, ref.size - 1): if arr[0][i] > radius: radius = arr[0][i] center = i del arr - return k[center - radius + 1: center + radius: 2] + # By nature, a palindrome is symmetrical around center (+/- radius) + return sequence[(center - radius + 1) // 2 - 1: (center + radius) // 2] diff --git a/src/designer_dna/_oligos.pxd b/src/designer_dna/_oligos.pxd index 0eff996..82ea680 100644 --- a/src/designer_dna/_oligos.pxd +++ b/src/designer_dna/_oligos.pxd @@ -30,11 +30,15 @@ from common cimport StringView cdef: - void c_reverse(char*, Py_ssize_t) + void c_reverse(unsigned char*, Py_ssize_t) void v_reverse(StringView*) - void c_complement(char*, Py_ssize_t, unsigned char*) + void c_complement(unsigned char*, Py_ssize_t, bint) void v_complement(StringView*, bint) - void c_reverse_complement(char*, Py_ssize_t, unsigned char*) + void c_reverse_complement(unsigned char*, Py_ssize_t, bint) void v_reverse_complement(StringView*, bint) + + (Py_ssize_t, Py_ssize_t) c_palindrome(unsigned char*, Py_ssize_t, bint) + int c_stretch(unsigned char*, Py_ssize_t) + int c_nrepeats(unsigned char*, int, int) diff --git a/src/designer_dna/_oligos.pyi b/src/designer_dna/_oligos.pyi index 4710980..3d87e04 100644 --- a/src/designer_dna/_oligos.pyi +++ b/src/designer_dna/_oligos.pyi @@ -27,7 +27,19 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -"""Cythonized oligonucleotide functions.""" +from array import array +from typing import Any + +def m_reverse(sequence: array[int]) -> Any: + """Reverse a nucleotide sequence. + + Args: + sequence (uchar[]): Nucleotide sequence writeable memory view. + + Returns: + (void) Reverse a sequence in place. + + """ def reverse(sequence: str) -> str: """Reverse a nucleotide sequence. @@ -46,6 +58,18 @@ def reverse(sequence: str) -> str: """ +def m_complement(sequence: array[int], dna: bool = ...) -> Any: + """Complement a nucleotide sequence. + + Args: + sequence (uchar[]): Nucleotide sequence writeable memory view. + dna (bool): Sequence is DNA, else RNA. + + Returns: + (void) Complement nucleotide sequence in place. + + """ + def complement(sequence: str, dna: bool = ...) -> str: """Complement a nucleotide sequence. @@ -64,6 +88,18 @@ def complement(sequence: str, dna: bool = ...) -> str: """ +def m_reverse_complement(sequence: array[int], dna: bool = ...) -> Any: + """Reverse complement a nucleotide sequence. + + Args: + sequence (uchar[]): Nucleotide sequence writeable memory view. + dna (bool): Sequence is DNA, else RNA. + + Returns: + (void) Reverse complement nucleotide sequence in place. + + """ + def reverse_complement(sequence: str, dna: bool = ...) -> str: """Reverse complement a nucleotide sequence. @@ -90,7 +126,7 @@ def palindrome(sequence: str, dna: bool = ...) -> str: dna (bool): Sequence is DNA, else RNA. Returns: - (str): longest palindromic subsequence within sequence. + (str) longest palindromic subsequence within sequence. Examples: .. code-block:: python @@ -98,15 +134,24 @@ def palindrome(sequence: str, dna: bool = ...) -> str: palindrome("ATAT") == "ATAT" palindrome("GATATG") == "ATAT" palindrome("ANT") == "ANT" # Handles degenerate bases - palindrome("UGCA", False) == "UGCA" # Handles RNA sequences Notes: - * Algorithmic time complexity O(NlogN). * If a sequence contains two or more palindromic substrings of equal size, the first leftmost palindrome is prioritized. """ +def m_stretch(sequence: array[int]) -> int: + """Return the maximum length of a single letter (nucleotide) repeat in a string. + + Args: + sequence (uchar[]): Nucleotide sequence writeable memory view. + + Returns: + (int) Length of maximum run of a single letter. + + """ + def stretch(sequence: str) -> int: """Return the maximum length of a single letter (nucleotide) repeat in a string. @@ -114,7 +159,7 @@ def stretch(sequence: str) -> int: sequence (str): Nucleotide sequence string. Returns: - (int): Length of maximum run of a single letter. + (int) Length of maximum run of a single letter. Examples: .. code-block:: python @@ -124,6 +169,22 @@ def stretch(sequence: str) -> int: """ +def m_nrepeats(sequence: array[int], n: int) -> int: + """Calculate the maximum observed repeats of composite pattern size n characters. + + Args: + sequence (uchar[]): Nucleotide sequence string. + n (int): Size of k-mers (composite pattern) to observe. + + Returns: + (int) The longest tandem run of nucleotides comprised of a composite pattern + of length n characters. + + Raises: + ZeroDivisionError: if value of n is 0. + + """ + def nrepeats(sequence: str, n: int) -> int: """Calculate the maximum observed repeats of composite pattern size n characters. @@ -136,7 +197,7 @@ def nrepeats(sequence: str, n: int) -> int: of length n characters. Raises: - ValueError: if value of n is less than 1. + ZeroDivisionError: if value of n is 0. Examples: .. code-block:: python diff --git a/src/designer_dna/_oligos.pyx b/src/designer_dna/_oligos.pyx index c9e2adb..d654895 100644 --- a/src/designer_dna/_oligos.pyx +++ b/src/designer_dna/_oligos.pyx @@ -30,6 +30,7 @@ # cython: boundscheck=False, wraparound=False, nonecheck=False """Cythonized oligonucleotide functions.""" +from libc.string cimport memcpy from libc.stdlib cimport free, malloc from common cimport ( @@ -43,26 +44,52 @@ cdef extern from "oligos.h": const unsigned char RNA[0x100] -cdef inline void c_reverse(char* seq, Py_ssize_t length) noexcept: +cdef inline void c_reverse(unsigned char* sequence, Py_ssize_t length) noexcept: """Reverse a C string in place. Args: - seq (char*): buffer sequence. - length (Py_ssize_t): length of seq. + sequence (uchar*): Buffer sequence. + length (Py_ssize_t): Length of sequence. + + Returns: + (void) Reverse sequence in place. """ cdef Py_ssize_t start, end, x = length // 2 for start in range(x): end = length - start - 1 - seq[start], seq[end] = seq[end], seq[start] + sequence[start], sequence[end] = sequence[end], sequence[start] cdef inline void v_reverse(StringView* view) noexcept: - """Handle reverse in place on StringView directly.""" + """Handle reverse in place on StringView directly. + + Args: + view (StringView*): Nucleotide sequence view. + + Returns: + (void) Reverse char in place. + + """ c_reverse(view[0].ptr, view[0].size) +cpdef void m_reverse(unsigned char[:] sequence): + """Reverse a nucleotide sequence. + + Args: + sequence (uchar[]): Nucleotide sequence writeable memory view. + + Returns: + (void) Reverse a sequence in place. + + """ + cdef Py_ssize_t length = sequence.shape[0] + + c_reverse(&sequence[0], length) + + cpdef str reverse(str sequence): """Reverse a nucleotide sequence. @@ -82,17 +109,20 @@ cpdef str reverse(str sequence): return sequence[::-1] -cdef void c_complement( - char* sequence, +cdef inline void _c_complement( + unsigned char* sequence, Py_ssize_t length, unsigned char* table ) noexcept: """Complement sequence C string in place. Args: - seq (char*): buffer sequence. - length (Py_ssize_t): length of seq. - table (unsigned char*): translation table. + sequence (uchar*): Nucleotide sequence pointer. + length (Py_ssize_t): Length of seq. + table (uchar*): translation table. + + Returns: + (void) Complement seq in place. """ cdef: @@ -107,12 +137,57 @@ cdef void c_complement( sequence[idx] = table[ sequence[idx]] -cdef inline void v_complement(StringView* view, bint dna) noexcept: - """Handle complement in place on StringView directly.""" +cdef void c_complement( + unsigned char* sequence, + Py_ssize_t length, + bint dna +) noexcept: + """Complement sequence C string in place. + + Args: + sequence (uchar*): Nucleotide sequence pointer. + length (Py_ssize_t): Length of seq. + dna (bint): DNA else RNA. + + Returns: + (void) Complement sequence in place. + + """ if dna: - c_complement(view[0].ptr, view[0].size, &DNA[0]) + _c_complement(sequence, length, &DNA[0]) else: - c_complement(view[0].ptr, view[0].size, &RNA[0]) + _c_complement(sequence, length, &RNA[0]) + + +cdef inline void v_complement(StringView* view, bint dna) noexcept: + """Handle complement in place on StringView directly. + + Args: + view (StringView*): Nucleotide sequence view. + + Returns: + (void) Complement char in place. + + """ + c_complement(view[0].ptr, view[0].size, dna) + + +cpdef void m_complement(unsigned char[:] sequence, bint dna = True): + """Complement a nucleotide sequence. + + Args: + sequence (uchar[]): Nucleotide sequence writeable memory view. + dna (bool): Sequence is DNA, else RNA. + + Returns: + (void) Complement nucleotide sequence in place. + + """ + cdef: + Py_ssize_t length = sequence.shape[0] + unsigned char* c_string = &sequence[0] + + c_complement(c_string, length, dna) cpdef str complement(str sequence, bint dna = True): @@ -138,21 +213,24 @@ cpdef str complement(str sequence, bint dna = True): return to_str(view) -cdef void c_reverse_complement( - char* sequence, +cdef inline void _c_reverse_complement( + unsigned char* sequence, Py_ssize_t length, unsigned char* table ) noexcept: """Reverse complement sequence C string in place. Args: - sequence (char*): buffer pointer to nucleotide char sequence. - length (Py_ssize_t): length of seq. - table (unsigned char*): translation table. + sequence (uchar*): Nucleotide sequence pointer. + length (Py_ssize_t): Length of sequence. + table (uchar*): Translation table. + + Returns: + (void) Reverse complement sequence in place. """ cdef: - char* end_ptr = sequence + (length - 1) + unsigned char* end_ptr = sequence + (length - 1) while end_ptr > sequence: sequence[0], end_ptr[0] = ( @@ -166,12 +244,56 @@ cdef void c_reverse_complement( sequence[0] = table[ sequence[0]] -cdef inline void v_reverse_complement(StringView* view, bint dna) noexcept: - """Handle reverse complement in place on StringView directly.""" +cdef void c_reverse_complement( + unsigned char* sequence, + Py_ssize_t length, + bint dna +) noexcept: + """Reverse complement sequence C string in place. + + Args: + sequence (uchar*): Nucleotide sequence pointer. + length (Py_ssize_t): Length of seq. + dna (bint): Sequence is DNA, else RNA. + + Returns: + (void) Complement seq in place. + + """ if dna: - c_reverse_complement(view[0].ptr, view[0].size, &DNA[0]) + _c_reverse_complement(sequence, length, &DNA[0]) else: - c_reverse_complement(view[0].ptr, view[0].size, &RNA[0]) + _c_reverse_complement(sequence, length, &RNA[0]) + + +cdef inline void v_reverse_complement(StringView* view, bint dna) noexcept: + """Handle reverse complement in place on StringView directly. + + Args: + view (StringView*): Nucleotide sequence view. + + Returns: + (void) Reverse complement char in place. + + """ + c_reverse_complement(view[0].ptr, view[0].size, dna) + + +cpdef void m_reverse_complement(unsigned char[:] sequence, bint dna = True): + """Reverse complement a nucleotide sequence. + + Args: + sequence (uchar[]): Nucleotide sequence writeable memory view. + dna (bool): Sequence is DNA, else RNA. + + Returns: + (void) Reverse complement nucleotide sequence in place. + + """ + cdef: + Py_ssize_t length = sequence.shape[0] + + c_reverse_complement(&sequence[0], length, dna) cpdef str reverse_complement(str sequence, bint dna = True): @@ -198,8 +320,8 @@ cpdef str reverse_complement(str sequence, bint dna = True): cdef inline void _center( - char* seq, - char* comp, + unsigned char* seq, + unsigned char* comp, Py_ssize_t* left, Py_ssize_t* right, Py_ssize_t length, @@ -227,6 +349,40 @@ cdef inline void _update_bounds( end[0] = right +cdef inline (Py_ssize_t, Py_ssize_t) c_palindrome( + unsigned char* seq, + Py_ssize_t size, + bint dna +) noexcept: + cdef: + unsigned char* com = malloc((size + 1) * sizeof(unsigned char)) + Py_ssize_t i, left, right, current, length = 0, start = 0, end = 0 + + memcpy(com, seq, size) + com[size] = "\0" + c_complement(com, size, dna) + + for i in range(size - 1): + # Check even length palindromes first (more common for ATGC based sequences) + left = i + right = i + 1 + _center(seq, com, &left, &right, size) + _update_bounds(left, right, ¤t, &length, &start, &end) + + # Only check odd length palindromes in case of (center) degenerate bases + if seq[i] != com[i]: + continue + + left = i - 1 + right = i + 1 + _center(seq, com, &left, &right, size) + _update_bounds(left, right, ¤t, &length, &start, &end) + + free(com) + + return start, end + + cpdef str palindrome(str sequence, bint dna = True): """Find the longest palindromic substring within a nucleotide sequence. @@ -235,7 +391,7 @@ cpdef str palindrome(str sequence, bint dna = True): dna (bool): Sequence is DNA, else RNA. Returns: - (str): longest palindromic subsequence within sequence. + (str) longest palindromic subsequence within sequence. Examples: .. code-block:: python @@ -243,41 +399,67 @@ cpdef str palindrome(str sequence, bint dna = True): palindrome("ATAT") == "ATAT" palindrome("GATATG") == "ATAT" palindrome("ANT") == "ANT" # Handles degenerate bases - palindrome("UGCA", False) == "UGCA" # Handles RNA sequences Notes: - * Algorithmic time complexity O(NlogN). * If a sequence contains two or more palindromic substrings of equal size, the first leftmost palindrome is prioritized. """ cdef: StringView seq = str_to_view(sequence) - StringView com = str_to_view(sequence) - Py_ssize_t i, left, right, current, length = 0, start = 0, end = 0 + Py_ssize_t start, end - v_complement(&com, dna) + start, end = c_palindrome(seq.ptr, seq.size, dna) + free(seq.ptr) - for i in range(seq.size - 1): - # Check even length palindromes first (more common for ATGC based sequences) - left = i - right = i + 1 - _center(seq.ptr, com.ptr, &left, &right, seq.size) - _update_bounds(left, right, ¤t, &length, &start, &end) + return sequence[start: end] - # Only check odd length palindromes in case of (center) degenerate bases - if seq.ptr[i] != com.ptr[i]: - continue - left = i - 1 - right = i + 1 - _center(seq.ptr, com.ptr, &left, &right, seq.size) - _update_bounds(left, right, ¤t, &length, &start, &end) +cdef inline int c_stretch( + unsigned char* sequence, + Py_ssize_t length, +) noexcept: + """Compute nucleotide stretch of a single character repeat. - free(seq.ptr) - free(com.ptr) + Args: + sequence (uchar*): buffer pointer to nucleotide char sequence. + length (Py_ssize_t): length of sequence. - return sequence[start: end] + Returns: + (int) length of longest single character repeat subsequence. + + """ + cdef: + unsigned char prev = sequence[0] + Py_ssize_t j + int current = 0, longest = 0 + + for j in range(1, length): + if sequence[j] == prev: + current += 1 + if current > longest: + longest = current + else: + prev = sequence[j] + current = 0 + + return longest + + +cpdef int m_stretch(unsigned char[:] sequence): + """Return the maximum length of a single letter (nucleotide) repeat in a string. + + Args: + sequence (uchar[]): Nucleotide sequence writeable memory view. + + Returns: + (int) Length of maximum run of a single letter. + + """ + cdef: + Py_ssize_t length = sequence.shape[0] + + return c_stretch(&sequence[0], length) cpdef int stretch(str sequence): @@ -287,7 +469,7 @@ cpdef int stretch(str sequence): sequence (str): Nucleotide sequence string. Returns: - (int): Length of maximum run of a single letter. + (int) Length of maximum run of a single letter. Examples: .. code-block:: python @@ -300,7 +482,7 @@ cpdef int stretch(str sequence): StringView view = str_to_view(sequence) Py_ssize_t j int longest = 0, current = 0 - char prev = view.ptr[0] + unsigned char prev = view.ptr[0] for j in range(1, view.size): if view.ptr[j] == prev: @@ -316,8 +498,8 @@ cpdef int stretch(str sequence): cdef inline bint _compare( - char* p, - char* q, + unsigned char* p, + unsigned char* q, Py_ssize_t start, Py_ssize_t end ) noexcept: @@ -333,7 +515,12 @@ cdef inline bint _compare( return True -cdef inline void _assign(char* src, char* dest, Py_ssize_t start, Py_ssize_t end): +cdef inline void _assign( + unsigned char* src, + unsigned char* dest, + Py_ssize_t start, + Py_ssize_t end +): """Overcome slice assignment between two char variables of different sizes.""" cdef: Py_ssize_t j, count = 0 @@ -343,7 +530,7 @@ cdef inline void _assign(char* src, char* dest, Py_ssize_t start, Py_ssize_t end count += 1 -cpdef int nrepeats(str sequence, int n): +cdef int c_nrepeats(unsigned char* sequence, int length, int n) noexcept: """Calculate the maximum observed repeats of composite pattern size n characters. Args: @@ -354,38 +541,78 @@ cpdef int nrepeats(str sequence, int n): (int) The longest tandem run of nucleotides comprised of a composite pattern of length n characters. - Raises: - ValueError: if value of n is less than 1. - - Examples: - .. code-block:: python - - nrepeats("AAAA", 1) == 3 # True - nrepeats("AAAA", 2) == 1 # True - nrepeats("ACAACAACA", 3) == 2 # True - """ cdef: - StringView view = str_to_view(sequence) Py_ssize_t t = n - Py_ssize_t v = view.size // t + Py_ssize_t v = length // t Py_ssize_t j, k int current, max_val = 0 - char* previous = malloc((t + 1) * sizeof(char)) + unsigned char* previous = malloc( + (t + 1) * sizeof(unsigned char) + ) for k in range(t): - _assign(view.ptr, previous, k, t + k) + _assign(sequence, previous, k, t + k) current = 0 for j in range(1, v): - if _compare(view.ptr, previous, j * t + k, j * t + k + t): + if _compare(sequence, previous, j * t + k, j * t + k + t): current += 1 if current > max_val: max_val = current else: current = 0 - _assign(view.ptr, previous, j * t + k, j * t + k + t) + _assign(sequence, previous, j * t + k, j * t + k + t) - free(view.ptr) free(previous) return max_val + + +cpdef int m_nrepeats(unsigned char[:] sequence, int n): + """Calculate the maximum observed repeats of composite pattern size n characters. + + Args: + sequence (uchar[]): Nucleotide sequence string. + n (int): Size of k-mers (composite pattern) to observe. + + Returns: + (int) The longest tandem run of nucleotides comprised of a composite pattern + of length n characters. + + Raises: + ZeroDivisionError: if value of n is 0. + + """ + return c_nrepeats(&sequence[0], sequence.shape[0], n) + + +cpdef int nrepeats(str sequence, int n): + """Calculate the maximum observed repeats of composite pattern size n characters. + + Args: + sequence (str): Nucleotide sequence string. + n (int): Size of k-mers (composite pattern) to observe. + + Returns: + (int) The longest tandem run of nucleotides comprised of a composite pattern + of length n characters. + + Raises: + ZeroDivisionError: if value of n is 0. + + Examples: + .. code-block:: python + + nrepeats("AAAA", 1) == 3 # True + nrepeats("AAAA", 2) == 1 # True + nrepeats("ACAACAACA", 3) == 2 # True + + """ + cdef: + StringView view = str_to_view(sequence) + int max_val + + max_val = c_nrepeats(view.ptr, view.size, n) + free(view.ptr) + + return max_val diff --git a/src/designer_dna/headers/common.pxd b/src/designer_dna/headers/common.pxd index 38b68cf..9aa7f6a 100644 --- a/src/designer_dna/headers/common.pxd +++ b/src/designer_dna/headers/common.pxd @@ -42,7 +42,7 @@ cdef extern from "Python.h": ctypedef struct StringView: - char* ptr + unsigned char* ptr Py_ssize_t size bint origin @@ -53,7 +53,7 @@ cdef inline StringView construct(bytes s, Py_ssize_t length, bint isbytes): char* buffer = PyBytes_AS_STRING(s) StringView view - view.ptr = malloc((length + 1) * sizeof(char)) + view.ptr = malloc((length + 1) * sizeof(unsigned char)) memcpy(view.ptr, buffer, length + 1) view.ptr[length] = "\0" # c string terminator view.size = length @@ -63,7 +63,7 @@ cdef inline StringView construct(bytes s, Py_ssize_t length, bint isbytes): cdef inline StringView bytes_to_view(bytes b): - """Construct StringView from python bytes object""" + """Construct StringView from python bytes object.""" cdef Py_ssize_t length = PyBytes_GET_SIZE(b) return construct(b, length, True) @@ -80,7 +80,7 @@ cdef inline StringView str_to_view(str s): cdef inline str to_str(StringView view): """Convert StringView back into a python string object, safely releasing memory.""" - cdef str obj = PyUnicode_DecodeUTF8Stateful(view.ptr, view.size, NULL, NULL) + cdef str obj = PyUnicode_DecodeUTF8Stateful( view.ptr, view.size, NULL, NULL) free(view.ptr) return obj @@ -88,7 +88,15 @@ cdef inline str to_str(StringView view): cdef inline bytes to_bytes(StringView view): """Convert StringView back into a python bytes object, safely releasing memory.""" - cdef bytes obj = PyBytes_FromStringAndSize(view.ptr, view.size) + cdef bytes obj = PyBytes_FromStringAndSize( view.ptr, view.size) free(view.ptr) return obj + + +cdef inline object to_object(StringView view): + """Convert StringView back into a python object, safely releasing memory.""" + if view.origin: + return to_bytes(view) + + return to_str(view) diff --git a/tests/unit/test_oligos.py b/tests/unit/test_oligos.py index cf588eb..6da7734 100644 --- a/tests/unit/test_oligos.py +++ b/tests/unit/test_oligos.py @@ -29,14 +29,52 @@ """Unit test oligos module.""" -from typing import Callable +from functools import wraps +from typing import Any, Callable import pytest -from designer_dna import oligos +from designer_dna import _oligos, oligos -@pytest.mark.parametrize("function", [oligos.reverse, oligos.reverse_py]) +def wrapper(f: Callable[[str], str]) -> Callable[[str], str]: + """Wrap a function which acts inplace on string.""" + + @wraps(f) + def inner(s: str, *args) -> str: + b: bytes = s.encode("utf8") + ba: bytearray = bytearray(b) + m: memoryview = memoryview(ba) + + f(m, *args) + + return ba.decode("utf8") + + return inner + + +def wrap_out(f: Callable[[str], Any]) -> Callable[[str], Any]: + """Wrap a function which uses a string, but provides a different output.""" + + @wraps(f) + def inner(s: str, *args) -> Any: + b: bytes = s.encode("utf8") + ba: bytearray = bytearray(b) + m: memoryview = memoryview(ba) + + return f(m, *args) + + return inner + + +@pytest.mark.parametrize( + "function", + [ + oligos.reverse, + wrapper(_oligos.m_reverse), + oligos.reverse_py, + ], +) @pytest.mark.parametrize( ["seq", "expected"], [ @@ -67,7 +105,11 @@ def test_reverse(seq: str, expected: str, function: Callable[[str], str]) -> Non @pytest.mark.parametrize( "function", - [oligos.complement, oligos.complement_py], + [ + oligos.complement, + wrapper(_oligos.m_complement), + oligos.complement_py, + ], ) @pytest.mark.parametrize( ["seq", "dna", "expected"], @@ -108,7 +150,12 @@ def test_complement( @pytest.mark.parametrize( - "function", [oligos.reverse_complement, oligos.reverse_complement_py] + "function", + [ + oligos.reverse_complement, + wrapper(_oligos.m_reverse_complement), + oligos.reverse_complement_py, + ], ) @pytest.mark.parametrize( ["seq", "dna", "expected"], @@ -137,7 +184,14 @@ def test_reverse_complement( assert result == expected, "Unexpected reverse complement." -@pytest.mark.parametrize("function", [oligos.stretch, oligos.stretch_py]) +@pytest.mark.parametrize( + "function", + [ + oligos.stretch, + wrap_out(_oligos.m_stretch), + oligos.stretch_py, + ], +) @pytest.mark.parametrize( ["seq", "expected"], [ @@ -156,7 +210,14 @@ def test_stretch(seq, expected: int, function: Callable[[str], int]) -> None: assert result == expected, f"Unexpected stretch calculation: {result}" -@pytest.mark.parametrize("function", [oligos.nrepeats, oligos.nrepeats_py]) +@pytest.mark.parametrize( + "function", + [ + oligos.nrepeats, + wrap_out(_oligos.m_nrepeats), + oligos.nrepeats_py, + ], +) @pytest.mark.parametrize( ["seq", "n", "expected"], [