Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand All @@ -33,8 +33,11 @@ jobs:
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
pytest ogilo --doctest-modules --junitxml=tests/test-results.xml --cov=com --cov-report=xml --cov-report=html
- name: Upload pytest test results
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: pytest-results-${{ matrix.python-version }}
path: junit/test-results-${{ matrix.python-version }}.xml
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8"]
python-version: ["3.11"]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v3
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand Down
3 changes: 3 additions & 0 deletions ogilo/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from importlib.metadata import version

__version__ = version("ogilo-array")
13 changes: 8 additions & 5 deletions ogilo/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import streq as sq

from . import __version__
from .io import extract_col, write_constructs
from .types import Input, Oligo, Seq
from .utils import _get_pcr_handles, grouping_key
Expand Down Expand Up @@ -144,9 +145,11 @@ def _assemble(args: argparse.Namespace) -> None:
constructs = _generate_combos(constructs)

if args.pcr_handles:
constructs = _add_pcr_handles(constructs,
handle_set=args.handle_set,
grouped=True)
constructs = _add_pcr_handles(
constructs,
handle_set=args.handle_set,
grouped=True,
)

write_constructs(constructs, args.output)

Expand Down Expand Up @@ -185,7 +188,7 @@ def main() -> None:
Sequence for the oligo.

For example:
$ ogilo re:BsmBI:f file:test/guides-RLC12_mapped-tiny.tsv:guide_sequence:Name:ann_gene_biotype re:BsmBI:r -p
$ ogilo re:BsmBI file:test/guides-RLC12_mapped-tiny.tsv:guide_sequence:Name:ann_gene_biotype @re:BsmBI -p
group pcr_handles length mnemonic restriction_sites oligo_name oligo_sequence
rRNA sans18a 74 brief_nadia BsmBI_f-_up-rrs-1471818-ultimate_parody-BsmBI_r AGGCACTTGCTCGTACGACGcgtctcAACCCAAACACTCCCTTTGGAAgagacgATGTGGGCCCGGCACCTTAA
rRNA sans18a 73 rancid_kayak BsmBI_f-_up-rrs-1471818-nostalgic_sonata-BsmBI_r AGGCACTTGCTCGTACGACGcgtctcACCCAAACACTCCCTTTGGAAgagacgATGTGGGCCCGGCACCTTAA
Expand Down Expand Up @@ -225,7 +228,7 @@ def main() -> None:
parser.add_argument('--handle_set', '-s',
type=str,
default='all',
help='Which set of PCR handles to use. Either "all", "sanson2018", "illumina", or '
help='Which set of PCR handles to use. Either "all", "sanson2018", "illumina", "subramanian2018", "winston2022", or '
'a path to a CSV with column headings pcr_handle_id, pcr_handle_f, pcr_handle_r. '
'Default: %(default)s')
parser.add_argument('--format', '-f',
Expand Down
168 changes: 168 additions & 0 deletions ogilo/data/handles/raw/subramanian2018.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
# https://doi.org/10.1093/synbio/ysx008
pcr_handle_id,is_forward,to_reverse_complement,sequence
1,yes,no,AAACACGTGGCAAACATTCC
2,yes,no,AAACCGGAGCCATACAGTAC
3,yes,no,AAAGCACTCTTAGGCCTCTG
4,yes,no,AAAGGGGCCGTCAATATCAG
5,yes,no,AAATAAGACGACGACCCTCG
6,yes,no,AACGATGATGCTCACTCTCG
7,yes,no,AAGAATTACTGACCCCTCGG
8,yes,no,AAGACGATCCGAGCCATTAC
9,yes,no,AAGGAACTATGGCATCGAGC
10,yes,no,AAGGACTGCATACCAGGTTG
11,yes,no,AAGGATATGTAGACACCGCC
12,yes,no,AAGGCCCAGAAGGATACAAC
13,yes,no,AAGGCGCTCGGATAATACTC
14,yes,no,AAGGTATGTATAGCGACCGC
15,yes,no,AATAGGAACCTCTTACGCGG
16,yes,no,AATATCACGCAAAAGCACCG
17,yes,no,AATCAGTTTCTTTGGCAGCC
18,yes,no,AATGCAAAGCTATTAGCGCG
19,yes,no,AATGCGTCATTTTACACGGC
20,yes,no,AATGTCCTTAGGCAGTCGTC
21,yes,no,ACAACGAGCAGACCGAATAG
22,yes,no,ACAAGGAGTCGGCATATCAC
23,yes,no,ACAGAACGAACAGGCACTAC
24,yes,no,ACAGGAAGCAAGGTATACGC
25,yes,no,ACAGGGTATATTGAGTGCCC
26,yes,no,ACATAAGCGATCCCAAGGTC
27,yes,no,ACATCGCATACCAGAACAGG
28,yes,no,ACATTAAATTTCGCCGTGGC
29,yes,no,ACCACAGGTCAAGATTCACG
30,yes,no,ACCCGTATCGCATAAGGATG
31,yes,no,ACGAGATGATGCACCGATAG
32,yes,no,ACGATGGGGACATAGAACAC
33,yes,no,ACGGAGCCCTTATTGTAACC
34,yes,no,ACGTATGGGGAACACTACAC
35,yes,no,ACGTGAAACTGTATCGAGCC
36,yes,no,ACGTTCAGTTTTCCAATGGC
37,yes,no,ACTAGATTAGCAAGGCACCC
38,yes,no,ACTGGACCCAATAAAAGGCC
39,yes,no,ACTTCGATTGGCAAGGACTG
40,yes,no,AGAACATAGCATTCACGGGG
41,yes,no,AGACAACAATCTGAGGCTGG
42,yes,no,AGACAAGCCTTAACCGTAGG
43,yes,no,AGACACAAGGCTGATTCCAG
44,yes,no,AGACATGGGATTGACCACAC
45,yes,no,AGAGAGGCATGATTGACCTC
46,yes,no,AGAGTTGCACCTAGAATCCG
47,yes,no,AGATAGATGCTCCGTCAAGC
48,yes,no,AGATAGTCACGCACAAGACC
49,yes,no,AGATTAGCCGACTTTCCTGG
50,yes,no,AGATTAGCTGCCGATACTGG
51,yes,no,AGATTGTTACTCCGACGGAC
52,yes,no,AGATTTCCGACGAGATTCCC
53,yes,no,AGCATCCGTCTAAATCTCGG
54,yes,no,AGCTATAAGAATTGCCGGGC
55,yes,no,AGCTATGATCCCGGTGTAAC
56,yes,no,AGCTCAATCTAACAGTGGGG
57,yes,no,AGGACACCAGACCAATGAAG
58,yes,no,AGGGCTAATTACCATCAGCG
59,yes,no,AGGTGATCTGACGAATGTCC
60,yes,no,AGTAAAGCATAGTGCCCAGC
61,yes,no,AGTAGTATCCGAATCGCTGC
62,yes,no,AGTATCTCAGCAAGGGCAAC
63,yes,no,AGTATTAGGCGTCAAGGTCC
64,yes,no,AGTATTCTTACAGCCAGCCG
65,yes,no,AGTATTGCCGGACTAAACCC
66,yes,no,AGTCCCAAGTTCAGACGTAC
67,yes,no,AGTCCGACACAATGTGACAC
68,yes,no,AGTGAACTGACCGAATCCTC
69,yes,no,AGTGGTCTGTAAACCGTACC
70,yes,no,AGTGTTTTCCATTTTCCGCG
71,yes,no,AGTTATAAGGGTCCGATGCC
72,yes,no,AGTTGCAGTATCTAACCCGC
73,yes,no,AGTTGTAATATCACCCGCGC
74,yes,no,ATACGTGGCTAGCATGAGAC
75,yes,no,ATACTGTAAGAACCACGCGG
76,yes,no,ATAGATCATGTCGGCAGTCG
77,yes,no,ATAGATGGTGCCTACATGCG
78,yes,no,ATCACAACAAAGGACGGGTC
79,yes,no,ATCAGACAACACAGAGGCTG
80,yes,no,ATCCAGGAGGTCTAGGAACC
81,yes,no,ATCCTAGAAAAGGCGAAGGC
82,yes,no,ATGCCATGACGACAACTAGC
83,yes,no,ATGCTAGCTGGAACTATCGG
84,no,yes,ATTAGGATTGCGAGCGACAC
85,no,yes,ATTAGTACACTCCGTGAGCG
86,no,yes,ATTCAAGGGTTGGACGACTC
87,no,yes,ATTCTCACGACGCAAGATGG
88,no,yes,ATTGACGGGAACTACACTCG
89,no,yes,CACTCGATAGGTACAACCGG
90,no,yes,CAGACCTACGGATCTTAGCG
91,no,yes,CCACGAGATAAGAGGATGGC
92,no,yes,CCAGAGCTTAGGGGACATAC
93,no,yes,CCCGAGGGGAGAAATATACC
94,no,yes,CCGAGGGAACCATGATACAG
95,no,yes,CCGGGAGGAAGATATAGCAC
96,no,yes,CCGGTTGTACCTATCGAGTG
97,no,yes,CCGTGCGACAAGATTTCAAG
98,no,yes,CCTTTAACAGGACATGCAGC
99,no,yes,CGAACGCAAAAGTCCTCAAG
100,no,yes,CGATAGAACGACCAGGTAGC
101,no,yes,CGGATCGAACTTAGGTAGCC
102,no,yes,CGGGAGGAAGTCTTTAGACC
103,no,yes,CTAATATCCCTGAGCGACGG
104,no,yes,CTAGGGAACCAGGCTTAACG
105,no,yes,CTAGGGGATGGTCCAATACG
106,no,yes,CTATAGAATCCGGGCTGGTC
107,no,yes,CTGCTAGGGGCTACTTATCG
108,no,yes,GAAAAGTCCCAATGAGTGCC
109,no,yes,GAAGTGGTTTGCCTAAACGC
110,no,yes,GACCATGCAAGGAGAGGTAC
111,no,yes,GATACATAGACTTGGCCCCG
112,no,yes,GCACGCAAAAGGACATAACC
113,no,yes,GCAGCGTTTTAGCCTACAAG
114,no,yes,GCATAAAGTTGACAGGCCAG
115,no,yes,GCTAAATAGAGGGAAGCCCC
116,no,yes,GGAAAACTAAGACAAGGCGC
117,no,yes,GGAAACAATAACCATCGGCG
118,no,yes,GGGCACCGATTAAGAAATGC
119,no,yes,GGGTTGTCTCCTCTGATAGC
120,no,yes,GTACTCAGAGATTGCCGGAG
121,no,yes,GTATAAGATCAGCCGGACCC
122,no,yes,GTATGTCGGCTCTCGTATCG
123,no,yes,GTTCAGAGGTACGAACCCTC
124,no,yes,GTTGCATCTAAGCCAAGTGC
125,no,yes,TAAAGAGAGGGCGTCCAATC
126,no,yes,TAACGACGTGCCGAACTTAG
127,no,yes,TAAGATAGCACCACGGATGG
128,no,yes,TAAGGATTCATCAGGTGCGC
129,no,yes,TAAGGGACGATGCTTAACCC
130,no,yes,TACCACGAAATGCACAGGAG
131,no,yes,TACTGATAATTCGGACGCCC
132,no,yes,TACTTGAATACCACGTGGCC
133,no,yes,TAGCCAGGCAAAAGAGATCC
134,no,yes,TAGCTCGATAATCAAGGGGC
135,no,yes,TAGTGACCTAATGCCATGGG
136,no,yes,TAGTTGAGAACACGAACCCG
137,no,yes,TATAACAGGCTGCTGAGACC
138,no,yes,TATACTGAAGAACGGCCCAG
139,no,yes,TATCAATCCGGAACCAGTGC
140,no,yes,TATCACGGAAGGACTCAACG
141,no,yes,TCAAAGGAGCACGAACCTAC
142,no,yes,TCAAGGTCCGTTATGGAACC
143,no,yes,TCACATAGAAGGACATGGCG
144,no,yes,TCACTTGGTATCGAGAACGG
145,no,yes,TCAGCCTTTCATTGATTGCG
146,no,yes,TCATCGACAAGATACAGGCG
147,no,yes,TCCAATTATACGGAGCAGGC
148,no,yes,TCGAATATGCTGTAACCCCG
149,no,yes,TCGACCAGGTTATCATGAGC
150,no,yes,TCGAGACAAGAACGATTCCC
151,no,yes,TCTAGGACTATCACCGGAGG
152,no,yes,TCTTCATAAGCCAGAGTGCC
153,no,yes,TCTTGCGATAGACACAAGCC
154,no,yes,TGAGCCATAAAAGCAAAGCG
155,no,yes,TGAGCGCAGAACTATCAGAC
156,no,yes,TGCATAGTATCCCAACAGGG
157,no,yes,TGCCAAAGGGTAGAGACATC
158,no,yes,TGCTGAATGAGAAACCTCGG
159,no,yes,TGGGGACGACTTATAATGCC
160,no,yes,TGTGGACCCTATCAAACGAG
161,no,yes,TTAGCTCAGGTCCAAAGTCC
162,no,yes,TTAGTAGGCAAGCATACCCG
163,no,yes,TTCGGGAGCGGATTATACAC
164,no,yes,TTCTGGGACTGGATAACACG
165,no,yes,TTGACAGACAATCCGTAGGC
universal,no,yes,CTTCTCCTTTACTAGTGAATTC
20 changes: 20 additions & 0 deletions ogilo/data/handles/raw/winston2022.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# https://doi.org/10.1021/acssynbio.1c00482
pcr_handle_id,is_forward,to_reverse_complement,sequence
FP1,yes,no,CTATCTGGCCTCAAAGCACT
FP2,yes,no,TCTGGCAGCACGAAGATAAG
FP3,yes,no,AGGAGACAAGTAACGTGTGC
FP4,yes,no,TAAGAAAGGCATCGTCACCG
FP5,yes,no,GTCACGTCACCAGGTAACAA
FP6,yes,no,TTTCCGATAGTTGAGGCAGG
FP7,yes,no,TGGTGAAACTACCGACTTCC
FP8,yes,no,AGTTGGTGACTATCCGTCCT
FP9,yes,no,CACATAGGCAAAGCGGAGTA
RP1,no,no,TTGACTCCTCCTCTTGCCTA
RP2,no,no,CGACTCAATTCTGCTCCTGT
RP3,no,no,GTCGAGCACTGATTGTGGAA
RP4,no,no,GTACTGCTCGGCCACTTATT
RP5,no,no,GCGTTGTCTCTAGCGAAAGA
RP6,no,no,TCATTTCTCCGACAGGCTTG
RP7,no,no,CCTCTCTTCGCGTTGATCTT
RP8,no,no,GCAAGACAATAGGCTTCGGT
RP9,no,no,CGCTGCTGGTAATTTAACCG
63 changes: 63 additions & 0 deletions ogilo/data/handles/scripts/compile-table.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env bash

sort_table () {
local sep=${3:-,}
local field=$(get_column_number "$1" $2)
cat <(head -n1 "$1") <(tail -n+2 "$1" | sort -k"$field" -t"$sep")
}

set -e
set -x

script_dir="$(dirname $0)"
inputs=("$script_dir"/../raw/*.csv)
output_dir="$script_dir"/..
temp_dir="$script_dir"/../temp

mkdir -p "$temp_dir"

for inp in "${inputs[@]}"
do
inp_id=$(basename "$inp" .csv)
this_output_file="$output_dir"/"$inp_id"-pcr-handles.csv
pp_seq_file="$temp_dir"/"$inp_id"-preprocessed.csv
fw_seq_file="$temp_dir"/"$inp_id"-fw.csv
rv_seq_file="$temp_dir"/"$inp_id"-rv.csv

cat "$inp" | sed '1s/^\xEF\xBB\xBF//' | tr -d $'\r' | grep -v '^#' > "$pp_seq_file"
cat \
<(paste -d, \
<(head -n1 "$pp_seq_file") \
<(echo "reverse_complement")) \
<(paste -d, \
<(tail -n+2 "$pp_seq_file") \
<(tail -n+2 "$pp_seq_file" | cut -d, -f4 | tr ATCG TAGC | rev)) \
> "$pp_seq_file".temp && mv "$pp_seq_file".temp "$pp_seq_file"

awk -F, -v OFS=, \
'NR == 1 { print $0 } (NR > 1 && $3 == "yes") { $(NF-1)=$NF; print $0 } (NR > 1 && $3 == "no") { print $0 }' \
"$pp_seq_file" \
| awk -F, -v OFS=, \
'{ print "__join__",$1,$2,$(NF-1) }' \
>> "$pp_seq_file".temp && mv "$pp_seq_file".temp "$pp_seq_file"

awk -F, -v OFS=, \
'NR == 1 { print $1,$2,"pcr_handle_f" } (NR > 1 && $3 == "yes") { print $1,$2,$NF }' \
"$pp_seq_file" \
> "$fw_seq_file"

awk -F, -v OFS=, \
'NR == 1 { print $1,$2,"pcr_handle_r" } (NR > 1 && $3 == "no") { print $1,$2,$NF }' \
"$pp_seq_file" \
> "$rv_seq_file"

join --header -1 1 -2 1 -t, \
"$fw_seq_file" "$rv_seq_file" \
| cut -d, -f2- \
| awk -F, -v OFS=, -v inp_id="$inp_id" \
'NR == 1 { print $1,$2,$4 } NR > 1 { print inp_id"-"$1"."$3,$2,$4 }' \
> "$this_output_file"

done

rm -r "$temp_dir"
Loading
Loading