From 549af0cdbae090d3dcbca5c35a4a6064137a05f9 Mon Sep 17 00:00:00 2001
From: ykeren6 <yair.keren@epfl.ch>
Date: Wed, 3 Apr 2024 14:42:34 +0200
Subject: [PATCH 1/9] added uniformed docstrings and pytests

---
 .gitignore                                    | 160 ++++++++++
 README.md                                     |   1 +
 access_genbank.py                             | 118 +++++++
 process_fastq.py                              | 287 ++++++++++++++++++
 rbcl_Qiagen_tomato_5000.fastq.png             | Bin 0 -> 24917 bytes
 run_command.py                                |  64 ++++
 .../rbcL_Qiagen_tomato_5000_test_file.fastq   |   8 +
 test_utils.py                                 |  47 +++
 8 files changed, 685 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 README.md
 create mode 100644 access_genbank.py
 create mode 100644 process_fastq.py
 create mode 100644 rbcl_Qiagen_tomato_5000.fastq.png
 create mode 100644 run_command.py
 create mode 100644 test_material/rbcL_Qiagen_tomato_5000_test_file.fastq
 create mode 100644 test_utils.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..68bc17f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,160 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..d581ac4
--- /dev/null
+++ b/README.md
@@ -0,0 +1 @@
+# utils
\ No newline at end of file
diff --git a/access_genbank.py b/access_genbank.py
new file mode 100644
index 0000000..32ef2a8
--- /dev/null
+++ b/access_genbank.py
@@ -0,0 +1,118 @@
+from Bio import SeqIO, Entrez
+import pandas as pd
+import os.path as ospath
+
+Entrez.email = 'contact@genorobotics.org'
+
+#-------------------------------
+#extract information from ncbi
+#-------------------------------
+
+def build_search_term(gene_name, min_len=0, max_len=-1):
+    """ generate a search query for NCBI's nucleotide database (GenBank). Bio.Entrez does not provide filters such as sequence length and gene name
+    so a search query keywords must be used instead.
+    Used to make database queries in extract_database and download_database
+    
+    Parameters
+    ----------
+        gene_name: (str) name of the gene of interest
+        min_len: (int, optional) lower bound of the sequence length filter, by default 0
+        max_len: (int, optional) upper bound of the sequence length filter, by default -1 means no limit
+    
+    Returns
+    ----------
+        term: (str) search query
+    """
+    term = f"{gene_name}[Gene Name]"
+    if max_len == -1:
+        term += f" NOT 0:{min_len}[Sequence Length]"
+    else:
+        term += f" AND {min_len}:{max_len}[Sequence Length]"
+    return term
+
+
+def extract_database(gene_name, seqlen_start=0, seqlen_stop=-1):
+    '''Request data on GenBank from NCBI (with Entrez module), with filtering options
+
+    Parameters
+    ----------
+        gene_name: (str) name of the gene to extract from GenBank
+        seqlen_start: (int, optional) lower bound of the sequence length filter, by default 0
+        seqlen_stop: (int, optional) upper bound of the sequence length filter, by default -1 means no limit
+
+    Returns
+    ----------
+        seq_record: (str) raw data of all sequences
+    '''
+    handle = Entrez.esearch(db= "nucleotide", term= build_search_term(gene_name, seqlen_start, seqlen_stop), retmax = 10000)
+    record = Entrez.read(handle)
+    matching_requests = record["IdList"]
+    request_counter = 0
+    while len(record["IdList"]) == 10000:
+        request_counter += 1
+        handle = Entrez.esearch(db= "nucleotide", term= build_search_term(gene_name, seqlen_start, seqlen_stop)
+                                , retstart = request_counter*10000,  retmax = 10000)
+        record = Entrez.read(handle)
+        matching_requests += record["IdList"]
+    seq_handle = Entrez.efetch(db="nucleotide", id=matching_requests, retmode = "fasta", rettype = "fasta")
+    seq_record = seq_handle.read()
+    return seq_record
+
+
+def download_database(gene_name, seqlen_start=0, seqlen_stop=-1):
+    '''Download data to a .fasta file, with filtering options
+
+    Args:
+        gene_name: (str) name of the gene to extract from GenBank
+        seqlen_start: (int) lower bound of the sequence length filter
+        seqlen_stop: (int) upper bound of the sequence length filter
+
+    Returns:
+        data_path: (str) path to downloaded data .fasta file
+    '''
+    data_path = f"{gene_name}[{seqlen_start},{seqlen_stop}].fasta"
+    with open(data_path, mode="w") as file:
+        file.write(extract_database(gene_name, seqlen_start, seqlen_stop))
+    return data_path
+
+def download_sequence(species, gene_name, dst, start_length=None, stop_length= None, id = None, permissive_search = True):
+    """
+    download sequence from GenBank through the Entrez database. 
+
+    Parameters:
+    ----------
+    species(str): name of species
+    gene_name(str): name of gene
+    dst(str,Path-like): destination file path
+    start_length(int): minimum length of sequence
+    stop_length(int): maximum length of sequence
+    id(list): list of NCBi ids of sequences to download. If provided, overrides gene_name and species.
+    permissive_search(bool, default = True): when True, if Advanced NCBI query returns nothing, replace it with a less precise general query.
+    """
+    
+    if id == None:
+        search_term = f"{gene_name}[Gene Name] AND {species}[Organism]"
+        if start_length!= None or stop_length!= None:
+                search_term += f" {start_length}:{stop_length}[Sequence Length]"
+        handle = Entrez.esearch(db = "nucleotide", term = search_term, retmax = 1, idtype = "acc")
+        search_result = Entrez.read(handle)
+        handle.close()
+        id = search_result["IdList"]
+        n=0
+        for i in id:
+            n+=1
+        if n==0 and permissive_search:
+            search_term = f"{gene_name} {species} {start_length}:{stop_length}[Sequence Length]"
+            handle = Entrez.esearch(db = "nucleotide", term = search_term, retmax = 1, idtype = "acc")
+            search_result = Entrez.read(handle)
+            handle.close()
+            id = search_result["IdList"]
+    n=0
+    for i in id:
+        n+=1
+    if n==1:
+        handle = Entrez.efetch(db="nucleotide", id=id, retmode = "fasta", rettype = "fasta")
+        sequence = handle.read()
+        handle.close()
+        with open(dst, mode="a") as writer:
+            writer.write(sequence)
\ No newline at end of file
diff --git a/process_fastq.py b/process_fastq.py
new file mode 100644
index 0000000..7b794a8
--- /dev/null
+++ b/process_fastq.py
@@ -0,0 +1,287 @@
+import glob, gzip, math, random
+import numpy as np
+from Bio import SeqIO, Align
+import os.path as ospath
+import os
+import gzip
+import shutil
+import sys 
+import matplotlib.pyplot as plt
+
+
+#----------------------------
+#DEAL WITH FASTQ FILES
+#----------------------------
+
+def extract_gz(src_file, dst_file):
+    """
+    Extracts a gzipped file.
+
+    Args:
+        src_file (str): Path to the source gzipped file.
+        dst_file (str): Path to the destination file.
+
+    Returns:
+        None
+    """
+    with gzip.open(src_file, 'rb') as f_in:
+        with open(dst_file, 'wb') as f_out:
+            shutil.copyfileobj(f_in, f_out)
+
+
+
+def read_fastq(fastq_filepath=None):
+    """
+    Read a fastq file and return the reads.
+    
+    Args:
+        fastq_filepath (str): filepath of the .fastq file
+    Returns: 
+        reads(list of Bio.SeqRecord.SeqRecord): reads from the fastq file
+    """
+    if fastq_filepath is None: fastq_filepath = "data/rbcL_Qiagen_tomato.fastq" # default path (example)
+    if fastq_filepath.lower().endswith('.gz'):
+        f = gzip.open(fastq_filepath, 'rt')
+    else:
+        f = open(fastq_filepath, 'rt')
+    reads = []
+    for read in SeqIO.parse(f, "fastq"):
+        reads.append(read)
+    return reads
+
+
+
+def split_fastq(input_path, output_dir, base_name, percentile=20):
+    """
+    Split a FASTQ file into two separate files based on sequence length.
+
+    Args:
+        input_path (str): Path to the input FASTQ file.
+        output_dir (str): Directory where the output files will be saved.
+        base_name (str): Base name for the output files.
+        percentile (int, optional): The percentile value to split the sequences. Defaults to 20.
+
+    Returns:
+        tuple: A tuple containing the paths to the top percentile sequences file and the remaining sequences file.
+    """
+    # Read sequences and sort by length
+    sequences = list(SeqIO.parse(input_path, "fastq"))
+    sequences.sort(key=lambda x: len(x), reverse=True)
+
+    # Split into top percentile and remaining sequences
+    split_index = len(sequences) * percentile // 100
+    top_sequences = sequences[:split_index]
+    remaining_sequences = sequences[split_index:]
+
+    # Write to separate files
+    top_sequences_path = os.path.join(output_dir, f"{base_name}_top{percentile}.fastq")
+    remaining_sequences_path = os.path.join(output_dir, f"{base_name}_remaining{100-percentile}.fastq")
+    SeqIO.write(top_sequences, top_sequences_path, "fastq")
+    SeqIO.write(remaining_sequences, remaining_sequences_path, "fastq")
+
+    return top_sequences_path, remaining_sequences_path
+
+
+def concatenate_fastq(src_folder=None, dst=None):
+    """
+    Concatenate all .fastq from a folder into a single .fastq file.
+    Args:
+        folder (str): folder containing the .fastq files (usually fastq_pass folder)
+        dst (str): destination file (.fastq)
+    Returns:
+        None
+    """
+    if src_folder is None: src_folder = "fastq_pass" # default folder (example)
+    if dst is None: dst = f"{src_folder}/concatenation.fastq" # default destination (example)
+    if dst[-6:] != ".fastq": dst = f"{dst}.fastq"
+    def get_file_iterator(filename):
+        if filename.lower().endswith('.gz'):
+            f = gzip.open(filename, 'rt')
+        else:
+            f = open(filename, 'rt')
+        return f
+    fastq_list = [fastq for fastq in glob.glob(f"{src_folder}/*.fastq*")]
+    fastq_iterators = [SeqIO.parse(get_file_iterator(fastq), "fastq") for fastq in fastq_list]
+    while True:
+        for fq in fastq_iterators:
+            try:
+                SeqIO.write(next(fq), open(dst,"at"), "fastq")
+            except StopIteration:
+                fastq_iterators.remove(fq)
+        if len(fastq_iterators) == 0:
+            break
+
+
+def extract_fastq(main_dir, sample_nb, dst):
+    """
+    Extract all fastq files under a certain barcode/sample number from an expedition results folder.
+
+    Args:
+        main_dir(str): expedition folder path
+        sample_nb(int): sample number for which to extract fastq. ie 6 to extract from folder barcode06
+        dst(str): destination file path
+    Returns:
+        None
+    """
+    for root, dirs, files in os.walk(main_dir):
+        if "fastq_pass" in dirs:
+            pass_dir = ospath.join(root, "fastq_pass")
+            break
+    for root, dirs, files in os.walk(pass_dir):
+        if f"barcode{sample_nb}" in dirs:
+            fastq_dir = ospath.join(root, f"barcode{sample_nb}")
+        elif f"barcode0{sample_nb}" in dirs:
+            fastq_dir = ospath.join(root, f"barcode0{sample_nb}")
+    concatenate_fastq(fastq_dir, dst)
+
+#----------------------------------
+#GET AND VISUALIZE QUALITY SCORES
+#----------------------------------
+
+def getContentFile(pathFastqFile):
+    """
+    Open file and get all the reads.
+
+    Args:
+        pathFastqFile(str): fastq file to read
+
+    Returns:
+        allReadsAsSring(str): all fastq faile reads
+    """
+    # open file and get all the reads
+    allReadsAsString = []
+
+    with open(pathFastqFile) as fastq:
+        allReadsAsString = fastq.readlines()
+
+    return allReadsAsString
+
+
+def extractReadQuality(fileContent):
+    """
+    Extract the quality of the file reads. Note that the reads are concatenated in one list.
+
+    Args:
+        fileContent(str): fastq file content as a string
+
+    Returns:
+        qualityOfReads(list of str): quality scores as a string list
+    """
+    # the fileContent must contain multiple reads, where each read is 4 lines:
+    if not (len(fileContent) % 4 == 0):
+        raise ValueError("fastq file must have reads composed of 4 lines")
+
+    qualityOfReads = []
+
+    # the quality of each read is always the 4th element
+    for i in range(3, len(fileContent), 4):
+
+        line = fileContent[i]
+
+        # remove the newline character at the end
+        line = line[:-1]
+
+        qualityOfReads.append(line)
+
+    return qualityOfReads
+
+
+def convertQuality(allReadsQuality):
+    """
+    Converts the qualities from fastq format to int.
+
+    Args:
+        allReadsQuality(list of str): quality scores in the original format
+
+    Returns:
+        qualityOfReads(list of int): quality scores converted in int
+    """
+    # note that here we can't distinguish the reads from each other anymore
+    # they are all in one list
+    convertedQualities = []
+
+    for readQuality in allReadsQuality:
+
+        for rawQuality in readQuality:
+
+            # transform the character into the int quality
+            score = ord(rawQuality) - 33
+
+            convertedQualities.append(score)
+
+    return convertedQualities
+
+
+def visualiseQualities(pathFastqFile, readQualityConverted):
+    """
+    Creates a graph representing the frequency of each score value.
+
+    Args:
+        pathFastqFile(str): path of the fastq file in directory
+        readQualityConverted(list of int): quality scores converted in int
+
+    Returns:
+        None
+    """
+    plt.figure(figsize=(10, 6))
+
+    plt.hist(readQualityConverted, bins=range(0, 95))
+
+    plt.ylabel("Frequency")
+
+    plt.xlabel("Base quality score 0-93 (higher is better)")
+    plt.xlim(0, 95)
+
+    nameFile = os.path.basename(pathFastqFile)
+    plt.title(nameFile)
+
+    nameOutput = nameFile + ".png"
+    plt.savefig(nameOutput)  # saves in current directory
+
+
+#--------------------------------
+#CREATE AND DAMAGE SEQUENCES
+#--------------------------------
+    
+def create_random_sequence(length=500, seed=None):
+    """
+    Generate a random sequence.
+    Args:
+    length(int): length of the sequence generated
+    seed(int): random seed 
+    Returns:
+        random sequence(str)
+    """
+    if seed is not None: random.seed(seed)
+    return "".join(["ATGC"[random.randint(0,3)] for i in range(length)])
+
+def damage_sequence(sequence, mutation_rate=0.05, deletion_rate=0.05, insertion_rate=0.05):
+    """
+    Damage a sequence randomly with mutation (substitution), deletion and insertion.
+    Warning: to avoid infinite loop, don't set the insertion_rate to 1.0 (or near).
+
+    Args:
+        sequence(str): original sequence to damage
+        mutation_rate(float): mutation rate (between 0.0 and 1.0)
+        deletion_rate(float): deletion_rate (between 0.0 and 1.0)
+        insertion_rate(float): insertion_rate (between 0.0 and 1.0)
+
+    Returns:
+        sequence(str): damaged sequence
+    """
+    if mutation_rate < 0 or mutation_rate > 1:
+        raise Exception("[damage_sequence] mutation_rate is incorrect (must be between 0.0 and 1.0)")
+    if deletion_rate < 0 or deletion_rate > 1:
+        raise Exception("[damage_sequence] deletion_rate is incorrect (must be between 0.0 and 1.0)")
+    if insertion_rate < 0 or insertion_rate > 1:
+        raise Exception("[damage_sequence] insertion_rate is incorrect (must be between 0.0 and 1.0)")
+    sequence = "".join(["ATGC".replace(b, '')[random.randint(0, 2)] if random.random() < mutation_rate else b for b in sequence]) # mutation / substitution
+    sequence = "".join(['' if random.random() < deletion_rate else b for b in sequence]) # deletion
+    insertion_extension_rate = insertion_rate  # can be changed if the extension rate is different
+    def get_insert(extension_rate=insertion_extension_rate):
+        insert = "ATGC"[random.randint(0,3)]
+        while random.random() < extension_rate: insert += "ATGC"[random.randint(0,3)] # extension (after insertion)
+        return insert
+    sequence = "".join([sequence[i:i+1] + get_insert() if random.random() < insertion_rate else sequence[i:i+1] for i in range(len(sequence)+1)]) # insertion
+    return sequence
+
diff --git a/rbcl_Qiagen_tomato_5000.fastq.png b/rbcl_Qiagen_tomato_5000.fastq.png
new file mode 100644
index 0000000000000000000000000000000000000000..7c45ef9939fdde1b552dffbbf44eca5ef2c718fc
GIT binary patch
literal 24917
zcmeIa2Ut~Uwk^8VQp-r21p}61L_k0!gO+93Dxl<yB?1DHK?F&bRZ^uW3hGu6Fp-mz
zGnhaHB(p`LlC$ItcTBA6+kHB`KIip4-@ErdzpgGyw`;HUum7KO%rVBCza5u9v}iup
zd<KKDh<W%2MFwM53xhFJ{o6VCou2i1oAEDk+XL#hN*0E;_CH!1Fywx;wLEKKd)D~W
zhI0niHpUj_!u+EALOVB{v9-0dkq{6t`|}6*Ev$_Mw(B&y;v(}b4{O*k7#u&+KQlsP
zgN+%ClP{P*>{C8}zq8swUDa-Ks%O<tRU1F^{p9xKvzL#ShNk&Fi5;r^p~8<3+-$mI
zyn{d4Ly12y$=BPs@s?)L2jx9?jD36zOm_S>R?Jq`-n(<2_P$SjV=uXlQf6B<%e3@a
z=?vNBPl$Jq-5+um6H6by|N7->{cmtD3`SL$Tk4wWpBUfRPXGJtMYqNDgWIA%70#d^
zp3aylLH}m#*}Hcx{X1*Uw^$nd`oNg6Zu*Bc8y?J}A5PBt<05YV|3&^A)}*Z}Irhr(
z?bf;F>?A9F5%a3y6j>vWqVbWw0aoVtfyv3q9m*j$@XrGo=h2pyqQ@0&lJ;E@F$TFc
zPA~V^40dQ-TPv<u>pbL_s+GJ&*f6i~zPOEf`<{c>*KX(F@H`v+{V($y)w!1~d+Z}(
z8g~5Q_wyGmQc+gEB_AN;7unU{T5{#e6~!}WLgug7@dTgPv1QAa9J|T!z5<i-h<muP
zN2gw233VPD935cmMTbs#Z&he(4GvH}e*A`%lvHVgS*7V@7kgvoI$?&}eTU>!Y&V|m
z!oo^RmMqasvV7?8(8QU4Pxz30uu^4$S;?#4mc^fZKGS((fbH0D^H4-V+GtyN8#_8+
zO=^bpdw=Qh6ORuvix*8i+!URES8)Huix;sxeP*@gc-#hCiO8YZ^G^qu4iDFR4pi9`
zglZ+4`$*V@4R%%rf4Beg8FrM8W`<MpQM0kZ&UoXJE!WnF%HQ6hQcP=^CtRSOqkl*G
z!eEG1eRh~eY>>=I!!kcvSKa9JK_9L?XG4;b#BOgpv5}pj=Q?4>rx|yc+v&?iTKCVd
zuhusBTi&j{&dJ#O9_PlAC3#^mB30`K&oypY-36=oOOveXcb|UqC(|@syY0iX$8Dvd
zk8xL~LtWLqeSO|MDj~<8>vX=5GbsxT%*x8*<u7>irjR!}?a`Mnmiay+N4-j)Fqe&t
zjI>r51(X#B9vyhKOyRIs#^k7Jr24ZL^BXnd-50MbroV7~o#eZc5+$$xkEYRYKUubf
zM`tJ&JvtDnyF)c1gz3G-czn1gc`#i+XEDBWUxCb6hg|n8MwZ`IrK%Zq{9blGUkYTW
zA~GjBEqaHB?B^w8FDaJ<$u+jOhcDfHq@|Ii_kLxau(0qzxt2w!VZKjqPtS>Gr(P>3
zn(OfCrVU>nJEEY##%ojwJhEcZdMUBW-nzPz2M!%-$IkSTzK|f|U$VW+B6A{P{e=#}
z*|TTAWwX^pOiBxXp0~{Q(~H?G34M?3>+8awKHarqhw|ZgQ+D~46};9`(y}5VDwpOg
zQcSjPZ0PKaywH_==){Q=5;jd9NB!L0-F00?44nIm4uvTOUYWm&KVt6s^LzE22g*dc
zuU?wVp&9$^@xf~`@{N;y9sM8UP0Qkpm9UzjmY-gP>1Afz*{Sh4#$Vi~>CMZR`@>Zu
z74ceD{OL~*9v!&aHqsoxz9(#S>+ao<#p@(O)SsQ&p%L?B&LXZ`X581fM2^_Cm7eJN
z{8}aM%n9lK!u^g@6GH|b&rIIk;S2YZv^T-aQ;l|=y!R?6r#-pZUkS%z?PB*Z<<LiO
zy|?lT3-|d9y&d@Q^v6p}moDvpu|OcnV``$?Rqf`+194}HHkzKrtJ|UY<k!i`q0H3M
zj4v-1NSy1q>oVH9m0A4y>Z*Y<HNDno*NN~I?>BGXx$~Hu&u%M?=ElYArM7W!a5#^3
zCVou6kQnb7BYz<Cj`L0?JzIVDH9Xq}Zf@?|+m#NwPPFUU4R%zRN*H1l9^rt$`(zoS
zoowy1+t_QMdj=y*=I*|^ZmGv{WCr?66vB-P{Atlw@@YQ7#%_7LHK6pWKt{;J{q82!
zDRz$a*F<ew3cqt6X=IffQ5<sUYYH`GM{!9z#-*jF5A+qtw%0wsBwD3#_H2Zgw|Cj&
zL!P{IE|*7Yh3~yMbF|6VqBPN>mN&|DYI5SrufINh(T_h8G0a=tSrL1P4xEap)37D>
zQ}`jz^(M6$>5I8#BFw8(T3`IK0ITYyVvm4_{dD-y^|eK^lRX^Ou2T~qlWl@^FLa8S
z)n@3_x{O60EjW5-XHk%x?{3HbGb^?${;+JzvE!-dI^*oSb#DvkO@@s<I{ns5GvVyL
z^{(SqcAXzp@MzvVyl#;iu?O>gMcETWwb4Vhu6m2z_amSlZ!eFesAbpjp~%1)A63F)
ztw2EK(@JRJo*H)N*G&sazc3VrDA`u@*ga!nP>TY?!-o&KcAaz^sK5?wd%a3e`JRvg
zkIwu|jiFiRJLB2-G1+yjvKDboL_d2W)9dA@8$5nuR^Jb_v(+3OQ*;rAZu4oWVw>N_
zeta@9Hl#Jc#y;mda^#5O$&(K><IX%3tvUa)O(A8s5+4&coSEA^f91}Qv98ntO@9?F
z98m{{<jLma<wo`$A0Cblw9~Rn*mr3s*cp~ZsXzO^xgemR$8p$e^HGzD(Sf$IaF(K?
zVt-E#_ffAf)yN&K9|wD$Uz%GMV{laD?1$s@esEfY@J>xCVhj)oGTQ`Wts9R>50ol}
zsYUyrI(6#S&6^MBE!)yE+*8j^shq)Zc+FsRacw&2vCAvZG&;RWs3uft>!!EIeD~xV
zIOW|HJm{s7dM;8h`)+sURLx<bm+l+|c9J-FLii`8DFavZ;OToW+1;SIYVF#yVJhKG
z#rEpzA6B#r3kl`@vS3x7JGaXjDQW3^oVRO&+4rR!ipAD0E_tSZh=u26uVM11A3Ai%
zcjoN*cjbKdB$wiB1t0Vp8Eub_6l87@6Z`NaC`eQPx8HsX!v5SMAtAx;9yHo2e;>E|
z>yk||eBoM&QS;`{7Ydg5i^;}|K7;2pKXQAAfsn8;zAw*Ty7I7)^Ju@-h!YarH6CGu
zcgtBlIA=cY>m6IJNE>wK7Ck=X>*mHVmCRnP6sZ}%*0pB8`(hEVJO$Z_2BFE~qXL{b
z%(3>}HN58C*4!?`4>^SmJ?IYe@wj<NEyoam<SZ?pid#4Qs=~6%+rq<hTI<mH^GTsN
z5d#@W&_M^TuB>Q7ZZPDz_e?*BOW$JzFYy{r?8)aposHMexsI>H?^}0fBEj<Kn9E1S
zdB$FkPfAMSb=i}sA0+Q5v1Rw}()*J3@41f3$%!ELh>izuV0>5*oHQ#-rv)*{^m;u@
zkK5PIJEAOH?Worsew`i6w(8U&vyQHJ9)fDpo9%P&@R^hZU7I;`W<zUh=ufj3P;gZ{
zc<?eZ;nvpTod%kkJ`%Pqb-fiScI}3e29uP#u<2A{4dryw&KEhxpDB9LsNA?ygDq(^
zZ}DQn7>LQX^?Kt|l88wZ7Lm7HZIatFf`fxMZrWtm@McM8bt=1280%{7pob05RK#t>
zUel|o$KsVnXb6k@$(!nB41P2n#i_o1>y~Yqi&~^+ApSI^>Vrax?aIr72N5?@Mp=_n
zxa5GR>-c^Jsh*z^ARiH^Xe|zuu=@P-mScBsL{1*bdD~c@yiP$>=|W_b0XI8BL#d{w
zh7Ntxd%p|$_Saeb>x6yxm?<Jc=ok)9jE|S%S=Y32GAzdc#q0Q$V@|(4tFNo8JOA5n
zEjbs=n|G6EVsfn8i{CG%qqWueTI~*v;Gy+mmUUM|jNacbLLv<U9MF5VMMg$<(V9J_
z*h?a2A9qiDdCBoH&e*%gWz39OoNUv~GtoWv&D^<0S~-pSIiB2X;4<Z>C-<>f0RaIX
zf}@>@wc2e0weH1%N*S>U&ccYy1|DyFnpbgg?Pzta<;!ifUobQ{nDKddq^~)0XpN}3
zGPW0wae0KsC^iy-sYTo}+FntuW8=78c3JxX;Wy<_#m#0Po<8Tw{Wv8aIo9yjo4;0a
z!-l#mcEwYJMP=o!pFTar?$oiLVyAfW0!ra|RQK)MhkcVs$O@SA3?4(R`k=LS3=Wjd
zxL<l%W)8#6H%A08Yt=jBgl?cOdUHB64NskV^ki!GEx?p*n>Ovokrkg4y+$Qmm2ZWB
zuJ+Yc0;;yQw$~?sQq=F=yEmGvH);KPz^Ui;D#~XvQ{y(j{PK%Ny_J;}znw<y)?ic1
zWy_Z77#7W%HLJ0{zTSFbG@$X^p&qUL+FHHY^Oqn0{`>C}405ioOAVZO_3Bj!4icbh
zw6>vc=J4mMrn-mr?#(jmE?ZsS8xqeYVsh9k!J^h&r<qSjB4endLccmvI{e)8pJu0=
z?|GDK*HW2i!QxSg*3~HvbnB?E*S|JZfkR=}oj#=7<Is_u?C)Il^vC@aV>vlFKc+dv
z);PBF9XN1cyO@|d!bJay#82Jb0tgfB_9F$>T(cNStAtU%9W7rJ5tA1nt5-bbAAWz}
zd>D{SO>w8=g$wPs)pU!xs#8U*KEFDf>+R*W`|O8ZD|l6RvW?7)W{*Yq-eR(5YM0k1
z#FU0A9YUT%vbW`|#Z4rXq-n*Q1R~q;ZrHFF0PX!?%J{8et?$C8a*aZrot;&J<<~h+
zjx_V<;*E(|)JOqhziXUhFZvSb3Ol=8KRaf0q~)=vb%$s$=K#{M34r-1Vu4P!?D&`c
z4(HDkP!%n!zr1Kof384g2=aTmx}Ss{4hIv(LQdddiGpldYe{ffhVuo}2%Xe(63(M>
zfU?3Yr26I^yLT%Cmff^%?eGG6Xb0wChbjd}qzn(AkJL^7XyC`LcO0}@si2@x7OIpZ
z!OLS>+jK5Zs`t&NQPed$?{A3NwcX0i=bphB%vvbH*jpTUL&~A|w4aQ#CWq|!Q@d3|
zTm8>}?<q4H^5kgj`I|RyPQ#w@VXJtPvLje4t%S2DKE3#*ELb66P0b|W&Wcs5rV9`N
z1w#M#<%4nlf__`JwGC;IwvDx)omov<G9e@sB4@tTJ(qBuOf!6cKeVCYG%E#r+%(H=
z#+B>W?__3X0>B1uI&}R^X~+q}qPZStQLfR`OAmg~Z>!DB?8=;SF)%dTftZlX5@2Ut
zoE5pjK74p&gfJrEOTxc&C{RD`&`OAS^!RaEnuCpqaq$m;*j|Z=iH3Q1AFh);cLqB&
zvE!KrfK2JF?Mlm+EP1$I+OY*lg8hW)tvxw>GZ1j&%5T3F0e-1uI5`kz2l_e=7%}k4
zojc4R_YNVy7cXAKoqoHNN;ZM?{$jfc!A}?Gt}nDHkPhCV5*C7@^J9|L<1O;{LvGyo
z6YEf)gQKI#i=XFR@$i@imqz{$2Q$Zeug>9EAJLTW+m48nXcBP#^E~V>ZCubxQAvr;
ziV1F)P#Zqey!8H|Yg~ay{^|(*#f1;|qh@tTN0^_#aA7G{M8|QUw^tEpDbBj_omPxz
ztwZxVeDVk$m2j9XKHctd9R&{a-Nao7)T09^D#L+hxx_3V2nh-8b{RiEGCHam+&7DH
z_JyB)$Sg)fWJGFGfSA<_gEXaJ`D`|ueYP?of46znhvDJjyLgAbEM3R9_hp<v2JYFk
z>E$s$i8AGA-RkofF02VIF$TCY${6oG8s6mC{n>xBuyBbJ>X{(q<Lp*jF6Ke6Q0?TH
ziu4Qi38VMBK2M?$^Ffi5wS0&28<e`zO*qhkM_K$x<hYVf!a(*Ki@S<}M>s7kENoDe
z_(o@pzLWZrH7f|mg}+@22WF&McFK5%TGUzRW<;pFlM`dUuU`ESF7MS^oob&T^YX=u
zV?Z@grg}~&B$}Sj<=(y6{pDfnk#QV?lasxy%+-2&dI*jBZRL^eF;mw@YIUjvGRMZt
zu3wZLtGmQ;tnuCLYcD_TYRqMc78MmWAxevS<z1XLHzEay*9WC-eRH!BPxAONWbxA*
zH*P$(cdy%a^`{T$N$-hSJ$n1*O;XfE56TYT3*G7L;n7hapavh*h9$8^1-v!V&XeOK
zO~^N*NU{JuulhSH*^TdqGyqo&Mh81h5u+pOHw34~uhGxLrjSD+D~E$AhpI{rXe@tf
zqISw4yu?6Z=B}o9_ExBo(p)ASOQLinm#tiBh(c}Nx8Lp}u!-z(PD9N+Cl7Bia^ghL
zF-~FgDzSET@MG@og4s#+9N@;ZuOd(xcU2{icElExBCb`YM9!MKxFut(s{~lFAIPCT
z!F+*b;>}HmOmKKV%g&yWB_sBF9pj{Fcg9%Mb#B=xtV9b!o^oGPzQtR|MDyyf*$Y;N
zxJ-=h(2NTi?5Yk2b`CjH^r-D)d^=wSc+0E-;-JjKJv=<LjaoptXh#jC-wzIMHcP&D
zJ32agk3~%cl`5{|J?@A*T%zU?g8JDF!^81fjd+-3eu3i;+;T;}*r)hP%P<&8-j-J+
z8FdLJRY}i~+-+Vr{`ljM66e31tPyEAIG1C+GU{*ZNKV01KZ8HIC%UMDkpsSPYCl-R
zD%A@^U3Lz;%Nnt8108LWIw(>Q=kA!wfj;_v|Ng#^O+)p=3`U*nAKYSAwav}S6;9Zm
zC?4bSkwyRlfsEl72FZd45#p5b7oD7(K%eM$Rwb7NZsFsT1C~C4;>O9@xhzO-%{p=G
zSAko^#WfIxttI^MKMES$uH&B+mvM7PBmC!rU?85TGBLWiq(sLAc{mC|>upZXp`j`p
z&x4-p^Q+Z}1g%?a$*$$;%DX4*Sa)$Q730&eHlY#KVIiX5Zlm{WRl<~+7HK_~eMBvy
zP|>|BC{XZPzG6k7K<2ojpr9Z?TOndgS(pm%>+Ebz(S+ZA{k0HpJML_S5L3}}z4UTU
zPUGrff+rMbep|XZ?p&uP0Xz^w-X4OE?U9K*%+SzK6C?uym?%k%YcrgQ1H=Q1Sk`5k
zj%EP85DPFqKHl)@lU2zW2X21v<Y|v0B)R2WT#jnh4@%3czI51CdYwj{emcD%FK_pE
z-+gCTm1I@^0RZk=N$c+Jw7%=8s$?gJUoPLNUI+-IE`pL;1kh#|5VCOg>R3}Y|E9NR
z0pG$jGF{Sdo(4^z?;+UK9-X;|K7~)mo2=ZaeiE;n$z%#<uNFp`T5l_nPZ-gsrRB^r
zE-n*bfaq4q^)gN$4r9Bm63|UMk7@-!Y<jy^#%Wk{(=zkeRF^r@qH`CoJq`r-;^oUo
zLHCyr@rsOzPyjw#v1Uy;;73yF&zCQUIS#aGCYt+EZHw|Px79Wbhh0?;A8oXih&Qj6
zP>IskXlF;Op>WVKUs(F+z=FWQ!0mhXsDd0RLdjPaa$=MC+QlrNA75V9>3N@JY;1hw
z=+W33-zis#@lP+^!Q>Qa#XcsgyFn7KVuwyjoSosWDo;26s|b(GcY{(@ptMkTyb0?W
z97IBPP4I~**t%uQj&0kvRgPZz=_hRy{@UwE)JW$##02@8NowR;&u>8xM4(kp=mD{`
zbeed#Yz@C=`S7_&(zu&Qw)Vh=jI=Gmqgp4tMK-wvx&ZzJ-MY2B-ER2HC+#uPC>Tj|
zXGpKYtMr-__4ePqefwLV<i+#moxni@Vz-GDM^Z`{pcE9?)Z59kb!$8Fa&kG(=FQA^
zGPwb-KUu2BHs<*dkMHKZq|oL>l(zUUKmUB@^_3On>bM9_{9`Q9Q7_^l5o%LPd3bov
z0OYF5r8^GX^748F%2ZPXfYTd1fVRmwum(T>{7tywNN=OH*jm&~cU1Zi=2N8{&G1`b
zlJn2CNz#pPa7}KXH44Z^PzX#alV!ee6@COH1K!)VW5*E`p=DT?I53lTT~!(=-Bo}1
z;b)$=c*DdWA?vC}X@?S}LOdFRG8J!3MS#nG%FKv8A3>w-Is4%OU~M>He;Bd|&vpbv
z{H${N^pk|M6_0N7s+kl%*w<E*o+{G5Zpo7Gu+!fANt)sjv7>Gh^EhMH9Lwr&>7E;G
z>?h<tQvlr}=cC>|cj4*{Ogg7cdG{WJ^=Pk3wjo9YD^HM@PcNgYQQjSy|2B)oLLK|S
zruluSaY>Mmu+jB_g|^I5+-6y#g<iXPx7o+IA~5TbjsE`rnlYz;!=?@dh4Th63A{K_
zZz?!P@uyzTALy#q0>&oTH)rAMlLHIs#@=K~9V^$#wp2STCwIG~Rr$z~`RvE^Qtbs}
zE>2VppeCn}ot*pMq}0?kcFl2HbK+c!mo|%gEnCP_g5s&0)+Ns0<)*(?ilX+p^XYVQ
zK_tVbF4=YI|8sKGc}Nv?=e9}OGNW=iuAAOF<VUxf760R${~hHp0eewK(*BBvN3C^S
zEVXl`+NPC3{{OCrp~Ym_N5$-CRh2elH65-_?~o49KW`9D+YNKsLVdX%{8(i-t>4@h
zXC*`px-XqPJQM#dId8_gtVO96rg^ln#yQx3BSk{J&|VV!l=i)Y`=77+w*uv;igw!h
z6I0{Ovd9zSBg4ZXP9wckq99L2f))d+2`wx<PJaNYhKS=_$KvAR(qIMd*>mQ+1D2q(
zi<GWyf^)xulXH7;3H}%Q=FQ;=lqLKkJv}{yWst2`tX>_G;XJlOT6!YEnN(rM-kQ2C
zR%(I>(&?ZqeI)Ip5HW*5D7XKzO7{j1a;D1!5A(kYH>PDDD$%Gm1pVsu1hv{WAt6OQ
zmytl8?f(pOR6VutGFMBz`0=?FUtfs#8dj~vOnbhGX7>PE&<#rpqlx}?Yzqx8jcE*=
zdLw|M5t;u1ZvAM=?hBKk?M;xRh>zzX06c&3;&KwzK3^5sv3c_WP`4Q}qb-|6Ov{gX
z?IuQmUq_ROc*^3aHB5206uc2!H!(qt-a%LmCc+%083}ghIzI-2_~k`i@3k9+IOPZZ
zY>yLw@qLa%w6YBICGu98R}OJIEAy&Fm15a>m=OQUP$~~Jcq<(7TD(^50kZcm?(SLv
zR$^-bzi$E?Ze?D%cC8GlM%x77QzhNeF5D0g3KXd-#(Dx~?p>>GN+}g-^P9c?CzRNe
zGg(qxe9OnDtrxo!H2{Zji<%(xy}&0=2<>}=eWVf*vW0YCp0L*w!VQWbXmgu?x^&47
z`bc<oWu+FR+X|BynE*&j4<0-?TgfqhzIAA_lO%q>cW^Mlgn!k#b&;t5bh_Eo$-hC+
zurT@*>KC9-|E7X~=m6JA4J4T|)Z)8A+Os4;0I`O$@1E6Xr4o=>6FL$Ll(T*F=GTtv
zWSqp<{&csDuICF!RnmIj@p4JnhWNXT8Z#A<PD!>QT?GkjK4?c<<8!GHLCTxGyu2bR
zx=@z~1_$$si&q$|l-|Cb3G9_<f`Y~0Yz$<$rA7gf)iF{4;;Z<z-|UI?2iAD=$+D`2
zeMsg`tp$Tj1yyEu_hnu|<rcPg2D=&@u(7mZB%~&MT@hr1h{o2|R?S$$Yb!EL|Eut$
zoNuy;9?>oB6`nE9yjqj?3F>*}NKFy5ikJgRLB~$8_;q-6BpRi@f({jcAC+gP4qRI&
zsrq+72iR)mwDVSrxONk<@wBT=7bLRu<ZwM<h#Kc19VTmiY&ifw;ZCpR>(|qn-horM
zY^!`JGzXD0g&XX;Yy2dqbMju9Lwhr?B-S_{MLAAb7CVOLm&=z){wan0VOX|sy_9<M
zdq1UU-Sk`c??*`3wVeTyDi<!?Y>ea43(f*cuWeVAIA~sFo2I;H{ulX~sCS-Gy<6Rx
zj*7uY#yKUv3L-F6t`oRU>4SZ<5YV=5-1xos7P)tqq-UglT7C|zRQLY#j~;?u<})3(
zLS2OfzkU1m1b@5CH7}YG2rSzA>|e^HtS~<(izrYP+Iw_Qk}op#hvhTY;x8wqT6&Ao
zS~Jf6IlZHHbd&s8#=oMD%MO3=VfKu5mdm09>E(Yhy8rp#ndB*@RQbJItn;~RCEY}p
zDs;-}AO1bgm9(mAJfqToNSXGMhW}?7W~MXq(^zEYhLrKTq&p6!^RS3rwt0V(VgELl
z{$Jx5n&4V1?WkNJJ#?R<L6YT<Wzcn=J$qJe{dWO_#J>YD*fWxO3499Rb<}Ah18+oZ
zMHD&?jx*l2Rk8D9{84yqRN=jd@Bt}w=H0FBbny3>eSCsoP0T1sk|vKHJt8N;>sPPN
z;I9x*t_k+u!((Ci7Q_Rn9YmPk;nNC2QFh?qLCsv~rlrtx{lsk!qV7^TcFcpQ<wa}7
zb}0<brUO<t*t08pwY}(%Mvy7XJY^?Cph<q0)0n*)founY>i)|Mkv<ZnpiR{0c(TbJ
zfHD@|neVxs`)`7}*v2IO;s}dBGCFfo6IPl+lx!;TCMwX1_7QW>LqDK^FY%YY0LICH
zPbUQdJiZv^`3fjfOfb$T5G0}2JweH+J1_>37O5IoqtU0$VwK@at3)xesK8V$-0qv3
zIx#aQ-8uWi)zi*DgVYe22?~7v{Q1R~IAlUJVhuHNIsPX&&<A~wWLS*c&%?wkPO<#-
z(^2BDNY*Wj(96`74=w>1AioQgLzWMqiq<D>FG4bLal2VcT~j+jQh_(%r#HuNoA^^<
z^k=r7YZ-VYN;A%V`J!zVvB3z};own^d%;<<jS0YXZbTahA%3Tz;0dsqFy9o!Zy`cq
z@UIDlNS|C}X)Sh~q@O%s)TpC9L1M-R4t&o>_^LRh`)^PgJD1z(IGD6YDfZn_P<>KL
zQD!HliDsW(lUmg%B3@Q|I6nY_;W32#^7b#EUM3Gtmjsp^Ge=c!?bJ|2(QL;V4mZu`
z`ufdGq8x6S&5<O=q4RGThtB`NI8+#x$3kRZ!KYc;IK5?_a^tGI&lK(@hL96tR{#_N
z9wzu<rNf7RRgF;pkq=@F8_*9B*qNz_cRj66`hZ;M{P65$SSZW`U>xGC>JNrD1qZk&
z9yxO5Xg>7pTQ_c;EZeD<p^f(j^(EzXE#)T04Hn|JTlGM9tsamH`@=K+vWNTUnpGv8
zfK1?DbD>+B$&wfMymF-&e?+fEB|`lK7zh=hs}-OHn1FzwSi|QmUK;{wp0pA&AN|2P
zunZfKa;=cpb%?m-2_-X1S=eom={uS8IXKk70HQVsC4~o~C)e&XH}D?b=S}C!2AgmC
z!_N<Ya)+F~kzN7p5~`q8`+=g{;O~kzA)SNB1GQ+~Bl7a9P`=`vMyz32(h&Lk^aA`1
zkv=$y>TsCEcMaBn7?lf<sn+a*&c6BRoe*3Iw8uqIsZ<|8Tx)#(6$)0#OU<3&*Jl~6
z>s@PG#dm%&V)|3w)YG&6UXAq+Gr$dtS)-u!tE#8bFeC9;#-f&?nhVjyU_vozMJdo_
zKAoK1e7*2lAnyU_115D@7YX}OfB>(QlFCP~j$wWne;pnIHaJ(?{+1K@{?b|ya~+|E
zr~t*u-4i-MkkkI^^ry9Boz@h_o_BBvg|0CQtWUyBXKkh)y{}Nkz<wxtyx)EIyqa(N
z<}6i4WFA=5q-lW9Qhe*RSp;?r!ggar-E06i`aUwNbeS)qtGk8wTHDVOHKEuF?;7#%
zTLYXF$%EnV)XQA4&9c>EF?MlDYIjCBxoYzLzJ80{$esrU_r;ZTQf!MB2&6r@apT5X
zQ(WaSsCZdV$!U6~ZnUw|Z4zX^c05%Bc+2lDnNL4clK99VSARvUQM>Hah(baSm8(=O
zu929&dR;dDq73v0RHTM$P&Lpdt}dLuV}>Yy?VNrDW@sqZ71_{mmUDA=U%vkJx>mTY
zxwB^*L;I-6CUF6KThw;?;AdUSO`I`7&Q0*m@s0x~2B%LGPfeCFl%@R;VNz42r!NzA
zG$Z}$*cyv8n7$Sh!`0iH(l3GndH`!}vuGL?Y^Z&kIHMYmMeav1#!-=4iSIyfkvOQX
zem7O?>k_2kN&EvLQ;~NowQ~CMbwLgIP1uBB$UB3@B{!3yVNh-_LQ-<5@Yh?=!yZAU
zRst_8Wh;`-*RNmSWlrdY;kM?E#Vkk4T(EFTk2H99nNNoyH(&gI3gpc$=<4=~2%MI=
zx7MXJgo(_}iO;(v58!jdBH)sYW!+xDXgMUg#OVdklFLmLZb3FIL$s&li#w5nE@P_!
znL8jA_cR<3o_|?z8Ok)bomk^lysF2LZEfto+ODgX(sqCosQT*;x6Ey*G%eiR3LboP
zu+n0nE0y?vC9fxlK&{{f&AX;)KDTyn3!_4gbrMf<xLvhDEP$t0f-uzB+#F06DjX}v
zW<UF?>W$ND!roc?{=rZ?TLFBbD*O^S?GZgFosF@E<{iIZk%cQlZ4XEIqBO%_Px6jq
ze~53&?^~;(lCwWA?-<_x={2(O6G3ID`pc~a8Ld5Ev^K8h!uj(pxK}PnL39>|M5b%h
ztVM4UgOe2eBve4RpFkKS+4aB=KeVv)Tu8P7Yf#rUIDNlK{8$xtKn8w(`2{O?7LxB2
z-%NHp*is{M<?5$D_J*U^Y%hxkenq?1;sZEPR`$Hpe^7M`Cm)7^m)b1ZsqwzA$d$nm
z6~F<2V3wSgiQEm(FfSC)75v(FzlN_jYQ3F0Ro>p-I=YbO%WKjv43HJ!Ybmog!y6k%
zo~*K9e++zezE%`nT_JA#OmMXXTmIOkDfAAZXLR(|tbU9<g@CJrNMfm^_Vw=W;_h(H
z+IwqYd)R#BR-xWx?<zTesS0Vi>5Of83C{&(#@2qp*S{cSUj<VO{c1Qga&C?@1b?=+
zq`|vel^>>e_17mO0=zlEwHy+P3Xl=A7+eS0so*Ai6V?Y!RqZe0A}A0(Sy|b9KS?!^
z3SLiOCP;^e)a1j{hsbO#O^c`RB<UQ58`P@25)vPQZ5q0}qv0F8l=_uM?)AfyC_!O|
zbNFmMlpfTixBfB|)j%($%Z?&OkmCxP+z$aRqoFD1I>~zdmtkYhgr9v)(Sv=L`k`5q
zXVZ&EKtO<AIm%^Q8~f?U*ABoJB5p*$=XU1vtKU9mx@LL^rXZuhh1}j%KK-F2^O3>j
zLp$fqn|I{!;p2fvZa1{I^Xhw~YJJ_u6j&guWria>QEZ_tx|xUPuor+`_NK3w7?GzS
zGB%_JNWB7LA#O*j9<B!jl-#NhoYP;!7O2&|l!5F^xD8%~Te;`K7Z8ObYBwEIvaY>F
zY=9Tb>YjK(Z>2>hOxH;dH%<R#b8UQqr}W@m8Q5Cd(_vFakc8vr2RJQWUGw|84JLX!
zw|C*`m0`?{cjz+&&0-4w2T_mm{!*bhs72MCUq7nT+quvYXDZ&Ta*s6JDx?y=zb`)R
zB!^}P^Zrk%mA?nphFs2mtl>839|Tb#r!{t+`?_+*Bi3`$;aB?jOy3=XaaA=n#4s3%
zKV;nB$9jUzrAvfe_O&(5U8;<_%eVh3LNe;iacn_wwbxEfCIY%W1jh!Am2EJBd|g%g
z=VhDS51+gW3?OBIX>dk!FukxDniXG$cGI`Hjc=))-4EJl$9LcD3s;N2!vp*5iK$U`
z=JK^`JDT3pXBbIVBVAjpvRAW);A|)ZjIAnNH2nvO2Qz&4WmYv_T-RmKCz?L}&XeBR
z3%4K%R743j`%DW9dtF%FhK1JzNj`;0$wNQ{*xM)}HOUo`yXy=or^psw-7_6gENR!s
zLJ>Cq%8k2eZN>fJ!>Os$@0nqq>}3Bnw=wKD{wv)wdclxDzlvM$xP-IMG`zV9LF~4t
zrzeT<Fo&l&@&CWpvn|-{>Dy~qGIP}K13CVHZOQY@U;hV>>^y1<&YB=R?H95}l>Knv
zfHBBZGVb8**^TxalXD5yGF3TfZl6DYCczq!<{0Wj=;?8Hj82~p!K);vTsgAG-Mg!u
z2P=#q%GGsF^-eEh<rO!VYv>t?GzxH5jWx{of?_3N?os4xkr9frEgZ-+OgB9RHpp-=
zfF>|*cs)T?m_9W*<|E}01Lz+_B@5D93AV`q42eg)&UM#B;#*s3rx7I#zHHiY2*VUv
zwVB1ejk!p6{Gg!~IAn$&&_xi&Od(?)k(0|G`wGUA48Cn)hrm7yPjVnW(j??kmo;NE
z^jhqFE}MUW3=UHYzOJJ57Zo3v=+ZE(;4i53m#<mlt6xhhvYecq!}LMz$jPl<)G!x@
z0CKg#92s;rcj>Zao0-p_Kj*K<|3MwRp;F)3*(ucLui`y00)7~+5C&SVaI_xHZ>;PY
z8e##asMsW93!&&})6CdHtDJR>7*tT<;d+@a!~(-0Y5V$LrG58-di?M~ee>qcp{kLZ
zpc1_xRFdRNO#t}M7{fS$famf@j>O!7D7;NtTIcQCx5?#eWnH8y2Nmv%eNwa;IS19l
z4n@*g;dTuisVgh_{>*{TnAqX7$1JR~GLe^`|Gi^bl#UkVaqK2#IK%#snT=q=h&RmV
zDvQ?B^HL^rCfo;sTMz^!&viUCq1TLUfl93?Yy@Z&n)7GHSZKls-s&Q7v$0-&I*#(t
zyxoNsgi(dU%w>g(iwkY8+`BNmfTD@$2?^Ood@c-)$_UpY=Q`AZ0MwzadhtN4iPA}(
zu06i}_Bgx<#L&VN@G_-U`!B>~$DwNb%}i;>{^LNv#6d6>Q9#4=9tNpL?!MTGtqT4i
zRxg#pk!6@ouYE(nrU$h%;cP002eC9<O<?oZt>HLsq+kF=Mfkv;@h&&_D4YX@^a*(I
zsPhT1LNy-%F9L~N)Bb`59MVAw0p@%|{r#oz-P46|hkIpYWY7;$2&%ugD|b5HnJ?hb
z_pq{G_J&*vs8^3LnaO6cT?X#_$$agzyHtT`30w5Oi63{}HnoM3q|&=_P2H&F??LQ;
zQKz39i+T+kqB;sUC-cjSMH#HO!`-@7{`OBqx&Ld3#D85nCc&1GB#xK))^224m0L!;
zec}IM^z%P%Kb_{^>jroHwE(|C^gl&RXI?@4ltzhwCOc}q`j}uA4qY*&bhZ9&>xRSN
zE2$<0_fQTKh>pp>fwy&<hOm2UzI&ke8o8!4)!u@*OkUN9LLl}PkcWhg-m8oJx8&xN
zHD9Z~EVKG)nP;P{I2sjx`st@Q#9#6X3uSMEq>45=E}_%E!phu;)Wjw0lBVm_a{$6r
z$Pd3Q9Yt|eie?#RG1^0vQ*4c?HPZx{k8(2z6O)&h7g2iTF{U5D(!nJ5_}y(@Q?x}8
zBS!66(9@ZUsK(J)6al#lv|xYOMDSlSwy(<klNV;`f_^&5&F-CgIR1X-@6l@4B^N&s
ze>_aA9*bw4nB`tjL15StO^|(Hrg~!6Ue5G7rLT{qG@IpvuL2cNJ#AQn+YxPXq3~oP
zIP!~tQA&J~%mdQxdn9Y<zWL8;!Po`{1*N1>CSeTj-<!EOd-bB#yY`Z`8xO<JJaX*V
zEuPtP=cbm<nLAgDl6YfhC!ee9l(s3>F~&@I+qO4kwHh89%dM=e3`^FwwT-irONEw?
z1&AIpokrumj2ROWY2&@Q*nhsLfNz-HLoJYdLH}67vs@irZ>5QArQ;P(e|(8-cZxdN
z?wcx1-_p>A2YQdu)_VqNN;BOtp8ATex&Qq0VZ{W_4lJc<f+P>m50Fhk6TXjcf|<M`
z;lk%v^H@H)iQZ5TQ2?Cw)hPIEDW;76MAj<0XqL;jEowh{6oGh(K*_C51P|Cp?K>=9
zHF<zU&1a{65xL{<9|g6ZjHg~tAQ*(AT;LZ$u|+k53dBii?2j|3SXA>N{1T93`Ji&z
z2BIxY1Njaw2XEi5JyVLlazg>hOsVbY>LA&A3eAi-kg<kO{p5qeQozOd@E;~@RWr~P
zFh1^`{mnNaunrQJ3ibXWIkUZ}kCM0%cnH{_;CPwTX+!!3cHmJjSP4l~fU04FDpJCs
zm)bU;Kxz;cUQD+&vKEj99{cS~a=YdZcT~idCEFM=i{aRZ#XuYa#8Dt1+TM2r^!QOo
zhr9e;4fZC=A(cMZKf*0)Gs>%LD&veF!|JA$D=jS@58pdo2-Lt0K!3QLcN-T!sqFt+
z(mDLn{GiqV``#_L=a@m34E>b$%BHZ>-Spp~Gft~}ZWmC&@vs|-<9ZB^i_vBNp9-S?
zPF3{}<L^HnDv$-}%wTjx{xyEEG}8aazAMv0tLmt8(mUv#){P4Oul16V*9}`o3&L=e
zrVV+iTEL%faQ!Fu_YZmV|HQ0?uglWq^dA7h*r|WKEdS46OLTu$=XrU;U(_yyUms@v
z7sfqg*<iCK<k%FWUAWrq9|OH8DD({%v>)U2b$#=X_aGtEy6ZV2ZCF>?lYczm$vM*8
zK7#s}sY?WO8&R+p86!dH%x~#RU~H&8bs6aD!;Bfy0UDSq0NHal+{lESiT?0Mvj{1s
zVAGN}S0;UQ4Vjr$-K06G!}Dquy2%>9+923bfa{Je58Zd$G#xXIK(sZfOo&8xO!VkT
zX{j>wKFW%4-;0-1vtIe8ta{BaIU5)bt(q}2dRGd!j2woE7Rd-^n&Ye<ka1wW`Ev@M
zz<|uhm+u(h<+ZW3SfKv)=^(%K*-%R+BaTf?CE1Nh9*nnaAG9)yYMi=cGd5+(NLsHk
z<-g0wl>3{0r-sB@7bSE_!aAgred*Gr+jsBYtvAA5flCiLTOW9omBA-HlhN^ZBq&uf
zEAmCb(yYjHZKd0ByWqH&6!vZYvn@!2EX6Rms&`Kf*K?rXZM%$&zuxyrDkdgjB*&t%
zv^lb(>Wco7bI+fBEBWbsn&fJITU-lBDJ6BqxGQdlXV<Rds97ouscS4IKg<YrDtLb@
zvi;<@PJMW>5+&igXWweossF5Fx$}j**!pg#V;?IP6znTF1V1!h8y}4<FmOU$M!omH
z6VFxi^Byz{&ziIFIGnm9(xTRHO&_XJa&k&@9PDWQo%IvF=;mKw3!*4<YBCkp{Ycsg
z;Qm21#Y0kzYVCs#h-Tt9J#qFM1gv(x;1^r@<Eph+pDw$8C`+35JN?>|^*hn}Plubh
zth$HohD;KS2^mx|kD{Zgs(NXcC4AhD(C>(cfAC-f*tqG=hfu-4$MJ;fSMlLpQDXyg
zoT-QY5{FidLU8O#ZeETN1K=FPk;(u^;_ui<Z4~%t?!@Z{Hdrhv>|Tnju8fYa%N*QM
z;AleN@N)e8+g8$fy2>r*YQaI}IMfvhWs!W{)L@82XxEg-8E%N4Djy+(E2uA{AaHC4
zcA#!{)TLD)#Jf)W?lCJypG`1m(^`Z3xY5o3*mA&i{ZPX%J@q;5#OFc=h`%soE_Zj=
zK5{*2S)?13S?-+akXIM=<;^C~yHN2{JgV($oef(GADXv)YyTM7Wz*XTjo9qf`p2N{
z09XhDOvIYJ50JG0ZgU`6qijppNw9K4DB^bmKTr4JK=KjJw%obVViBj%D=C0eHSPC3
zzor#K0xAhSx{CD-+>vjy|AqIfUq1UbX_jz{{l)%%Tl8WO*JD84F=)ZFQBq4<gOhyN
z>&hwtex`(Fog09i&ULx3Z3k9oKPR*Rs=ydQ11^$1Y<6xGpsrC+P354X>_OGEq8keS
z^Vo>SwVrQy2`-X39UK?aD*>_^KG}xJ2)AD0CN7~M&gvWjGfWoDKNuhYOIzGVB?^&~
zSqy1B2@x1P8aaCMIZe>jdeaV7RiL@OAI*!VXj`hdV}-kzK-0I6xJ?kO;1AGD#7$ju
zFcR?M#zEWq*DrA}d~@SF5CKKbL)BX$sYAv&gC=^%rh7&_Jr<kl7Jt*9`s*LqQ?Txw
z1Liakn;fZ=x~Ed;x?GjHI|sKa^ga@t(ahI!#cu7@Hw(~l5~h=y0AB$w)A7smx$!p5
z^7)WAfgjUUa(aHPH2VuzB7om^NIEDhXp#)xV1xvAZnx!?F`W&66+0#lEKgcD+TZ#Z
z*xnQviRg8a^-Gs-WUlp1>O>eNe+SUqTaYH`8zpPt<Uq75{xesiMr`HHRY&gZ3`Gr+
z;-M0%_&UXkcWTeZ=)d$eg4NlNz94Fe#1Y(VwR2|I-o2R%&~OLPt4TmNcNh#bc@9MF
zeo4J3co2O&G1LkShUo}oGaC)vuN@LeYkj|(6Bp%dF%PAdRWxv4xq3BLmo`OgflN3}
zhY`^8$EgY0cWF+=dhuk7S{X!G=I7dj4Sv6y9>waWO^?^~m%O=f|Fa8gHO4=nr(7lV
zoEgd!1@b1qZc!F%q+p+TSyH{bCSAmJ(rIESbLuwwv<Ft5huIiHSi5*Bc@=QF|ATSS
zH#NuaRY5^P%Fw=*=*<IUR<FnV<0X*;M~8SYNEiFizFkTO9g2#A*Ki_k8C4V)h6<2E
zjn`)kF@fSlif!x0-}IMQE)cERx89)xoa`RRg%GPNr9kMHKaO&%JfL_z8YH=v2)2mf
zt6!H;7apu1eIN8a?|}(SjW(_lnH;|>qL25LNbQYEFxl2yisfO08WC5`q7$?L3|0^*
z&7foVgo4lw8U$0eAq>Un_sS3Zr2b9s9cr@c<Cs>e8E$`I<JtcT4L@Ch{q@kQH$yP#
zN`jhifoI|d53z7qOB+)K%XrYrU@BWkjg9IwDn&Vf<1v_$pp<Y{BYmXFSG2srj!WG7
z35Fjz><^GW|K$OK%JNmK?2leZSx0dd{;Q<aMT{g~LI{gN^cmrM)T1G^lWCL+z*H@p
ztB^!smUb>5^PQ(SQs%)atc-m|&9mU)i(w10EGy`MhXIg92~A>DywQ6@<GwxV?bql%
zQBs0I7xX;p^PH%ZE@Iiia-*_JZR*~IApm%;W}o)Hk6<a$<a#7=un>K8Xc|xU(3zYY
z?pp@tl7s>1+Uc9yXEC%n0sXrEge6JBFd7(Z9-v!>2umCWYkMv8PxbYWk?!mhtxEW{
zlPvcKX#I|}Fp!0;wKRTUnS#_qG`sS;phrd+WjYJ721hNB<YpWoa>kR<J_QXmE#3+;
zMI^t%@?wmB;nB+4$vdX9G`9u1_yF4PwAn+J5`GC{%Qm8p%IVp7;L5fQ8#Z{RzQv#n
z%!4?AR1Zd%dghRo8C5+rdCA>UY%M$`CtxeWD+<Mch8CEcXh4ICFbvw%zegrtSZW8L
zBBcDB1%ws`Lktl_P(%oxpb@)5GXtGuM3K;VBUJL#w*yDq7B<OZ!D#=Y=|NaHa!}`<
zJtr^hW>%S?0!v!t5EKk0yf*>|@ZmxF-nM;v>y^s!zSa`hb&EnyJdU$#S8ZQ-3HF8%
z$XXSXZdN^?574fIA?=M-I{dwg$Wo1H=*Jl*oJ;5?-U(MH1q1kM2TD<z1pw0uiwBL~
zCjBi%6g;Ww#zR9g(Fn+wXdJ-`pFmp0pr`LhBJtXNVel+M<$+)3uPDa(3x>X`-IIE>
z7{Xm?yr~+3fE*TRTC%}V2~)g7wedXchY*n3CJ-9KPCUL$wKont<s^jW+bZl#SJ$#g
zEj0uNf^(M_uM5LQXc*1;SwUvx2M(z(Zh+CZ%_>A|ogU*JLk{2ESk8SfM%(4e7;r~F
zI%TP;0w$$WoH-jQ7aH=#uaoiyos|IVMF>;PG2;MC)HgpqF+pRQg8dzjpcOSQp&INS
zkSS(99T9hl&Mn9rhLfTnh~o_|i4&~^4-jna4|%&*F+au{d18<lO*;$4jgR7!+IPB9
z65uwU&=@lsQ+4oQ8^Ct#s5Vj!g1q6nN@{t9X_|*gru;8op1QC+1bue>_|rR<fB+(~
zG#aW&_SP94`tqfao{`j0wAmTy#u=9!)N|_j1<WF=eP{-wYAvEw)kTuXu%znbkIG$$
zs2)J=WbiH-W6%W{!#kL@V#4PNbq%j#JDpFs5NVrXaL+d8#|-CGvKj*Bn8I8@6VynL
zgSlelinCMi+pT2B$oAlTjQ1Um=%BWUCC-Gi?so@H8?07^aB6AKbTWsRAp}2$VKK@%
z=YYI{rLmfHKi1x1jB;xSsB1&p{PlJk#sOxuAANnM$a+K&V!bxDDz;qcc$TA#%|+P~
zciC1Pcoh?HwzU?weuOo(99l&jumv%0)HH+K`Z?wrPGksJ5}Hdz4+8(CF#<g$A7T^{
z$=Wc}stm1l(Mi#O0ANhJgdplhVRPzO$)c6>E$kHNL+8S{kQ?)tZ?k-@<f{OkoYs_@
z60UpSr+#9<m2(L_q%;;kVG@Z+x6g}#2-p%SpB)K6QA|P~D$N(`f5{<Bu8Q}nqG&H6
z<%&i)z&hWL={v@7?v(HGg_jKxs0HAdjdvU=yLj9o$O#yMo`hs@82Z~V&L<gon`mb0
zU!^yM&RNI)mhUiWPg{2A9dGm*nBG3d7+j^jh+ILl6j(eEw-6T&($qO>HzdsyTwZyP
zIHEWK1Mpjk*E?}0+W|GaZCg{O$Z!ISl2?C$>!cDMvmEi3P#HNM$*2rjufDj1Mo*xJ
z1b@99W(XD!JsJ8l3K7JjQ{Yb(ea>L?rlaWP1XiqXZ#PBT(ls6>jM-QQRkOVrLv2n1
z4;Z8m_4NrOdcA$NM@Xm`(dMN=2SzLGqBdZJrQ9B2z&Byc&a>r2|HyR*^iM|&LAMkF
z=&%WOK;O{xB!Le@_~L6kHYh03NDR?`kJm96mC%Nah(a?E$~;#KzA*6BfXrr+?a6JC
zZh`u{6%+neBx*fAxJcN#;mukW!b{#QUbXj6f`glrq@ctieP$bUpg7wDQ=d@wY8%MA
zo8G1&Tl0&cI^NUQTwu?60lYOev07KX$Bdk7Jbm?^vMC;(_^Zlc%B@#w`Cx5b4#R4H
zI~&G5n`|1>(BD;EdHCI+4Y$<!r`xrYp58o-Ne8Abn1;X`#a^wy{(RjGVN4KXJ)_%a
zHcd@-PenXK0I9^J9j$IjySCH*j&1a)1aH8gHo?y489$D#x(i9@oVW{0);!E^Fq-br
z#@ri2!~&E=HLg?6N{}4A@7*&l19`xSNCW#xt$O96#f$fVy-s$CmgQEX8u;dEK$%o+
zS?|s5&C;+0ns%U~48q)<a={TKI2y9RDnReghr>ef-il;5jE~ujbYH?CEIc30j)>4o
zd>@#ARE844-)f|4m7dd0{Q^WgwQ&S3OH*piwEq?PdK)l3>;M>#6+m4sn1J%4ad{=!
zUu7zPFlj;g$>AmG7-quH-pm0G9Ua+9@Bv`_g;;z4+37|rtSfz}9eutFkha>;Dla}5
zhsqIqyuhHNcZhuo$dlU4makm7g_4|kmAaR8deEq*EpijK`5m+}Z#IT+6R{?DMLWV#
zIGjrRL9P~J!h*K>3+gEb$<ZX{ec#6?3@AAiXq>;EJ^<oZCwo$=#0dmBRzw{U!mgs5
zy7_Ypwrph;RB`D?JVO9Vi4Z`XAjg*LWFI$8vZD!ns7j$jkdNhUNlA%;4hoGc)KQG#
z7P)A(p-Ko-t~weKxiGotctQ_V_DDA6V-Rt5dy0P{Hw>M<O`cMxu7R7O{zEOZ@(7D^
zmv86Idse;%UG7t3nN!|`^d}IQqOfmMJ4zsOys4}lyHeX~jXFsU$BmW`o$i`Q!8YQO
z^+zVh$Lbmz0}++9a(=*AJ=JL4&feG*@LoKvVX{-XM0{b8n;MXbPWcediJ_L7ypiN#
zR#&Gb<eK)6{Gd&zsp722Pnh$FoXd990}{er((p-IBI>J#?^+WBQxvbR<ikul8gK`*
zg;oG3;4o-y8t_6seXuXbA@<;Ig5l;%$1o&}Ua&|oV{KPzx;(~z{K4J?%c17>d$H!a
zE3uDgJ|J|aE1sTexNg+>z9>_&k(wfHQ|v5{){BN6D-0xuT10nwq%S26pap7*!Z@6A
zxT`T{ha2rp+IAO`e`(s8k&#h8^*%+s64kjf+Ok<Gx%+MIWM%EeOqm^mf<*>W=w3q=
z?)8LNiTl~yG~ue)_zM|;F@0(a>}VhyjzI{4nOi%-?3+1r2w%`#Nn*$le6)K!1u#&8
zB$qudBizC$0HRj+$7b7nbZ5uZL^z`oB81V30P}t@8PllLr^X6o-AzS+z6Lrg)v(1&
zusH@Oa)Z}6j)M|yDy>l~2=_kFXgMN(Y#A*0y(#7Q1}0dT%7|f?IR+I00;V^j#;cL+
zT4Ow6*IEzC*3uoI<oLyzvm<;ZCeU9#f5Czh;9V*x;b<4kS))#f7v&!tR+;H(NtZ_+
zy00$=D4~Hlh8J{<gPsAPQTJu;hsN%sx3+x}D!B{uEdYUfHzrj*MxUk}K%^4%Jv&U1
zZFIbpbRQKo@1{+gD$iqJ3ne=OgxE=JTUHN}6wQdFK}w|B(Ece7)To?p5%CR#_NR=o
zhZRwcx)|aB`NsOS8RWHg{|jA3=mqgH3(#BdGc~S54S;wJNgmM{LWs1^LO;n+L3?kn
z^~42d*hrDHgMf_Dys({Zbkb;cLtI=OZ@JWP-6iZh_6nBXRXAg4CKo?@FO){Z)XP&Q
z7`OM`L~pJDT0h<;DEP@Y%MEPP8I8homB19JjJh;GKvtGLDmx`ZEF1t$`A&!OZf!KD
z#t!(bcw{4y5bMG{C8Y~ZNqp_%*Eu;xT56>7jiwKF*GA*Cwu2Q)OISC9k@Oup@00cu
z?~8U0D3$~gi0LQ^RtN`@@p^(YrUpkZIj}Hy2du`#QEdMh8CU2|=}-y@pHfFAy;K_H
zwqk?(Le5862`ceGJd^-UJwiMPR|t?%>j*{dJPJpBKW^X*>PkX^=env|75ivJFb%sz
z5Q;M@SdTQF+c=UBWPKC<A{rt_l#(VIV=zr}p#7sLphp={Au8Hny!iah$Ic<}QjaO3
zCx&GOYqOVO+zE*HgP7MbZP~?Z&=_mRI3iV$$ka`W`bmSPHBrYehVQ8r2bR1*<r<BY
zX97lpuq-@5rO|qs+KHH0co^+0rg+4U=nA3PEVP$U-=gwxn=z}u+jUBd#!3=6#6l5*
z1I);?(<01+Kp*Ei<wCWM>(p3n8A_4%-Mx_WO|aIqNFdVUU}Z-U7J^WmteaAZ&>5+h
zDU&1XrOk^3gJvTpMRJ79NI3GsP>=06{vb=m;?<xLnU`kHnuTrvg;sza!p$U{;~glV
zNKO7UNj=4gOU3{>WaxrM7zGBfadhr#rXw%{wRh0eQWy@%j%|1E0G{5iw}DAL!NlCT
zObn=DYJ@gC&Cdo6u+0WA)~pqnjmGiaLKMNEBO)m<2C*2OB(|{Wq3krK`4wQa$VrET
zZAuvv-xhuT<#X9F1a~r9v5qc9fVeou?~1WJJJ4Am<P|}a{v5zs=Oc`_DH0UD3sOP}
z`Iv@b3fr|Qw;P%OTQniT?;+S!gPBS|SEO6>i@H_fhK=DL+>Tt>GcX_m-0GUJCP2p=
z^x~FXy9$?YS9}Gf;4S)BKQvx8L$Ti!$kG+U78M1sp~0Dza-X68T3{ZEDGF8U=+VHw
zyLlQ}^pr>PsN82Fe<xu8^}$F$PNpFON_h-Y2#sQbefMp6S2OCI3MAL(V3?F1{V;DC
zCughmr8opl!Z}K4%%g!)P=wClKw5YRZrQZyr3aVDJ9v$yr|}ntj_txLj+gPZD|s5x
z-EV{%;8`e8HwL-v0p@9Xma|?MUDXaz=@`;lxn<|hJcK%5@>V0AG=<e#fL^7cpfoZ^
zIYPbY82sFghER#jgyDA=q^Ct~a*I!^yuYKOWUaV04|@O;Z;I#+4Dp7`KQzj2()ZB7
ztdWsESD*0_xLdVTPH4k*NdS|0aC9gOdP?>F%87HLLeGwZuq~#BW_@`89Xke~<H;Yl
zxNP<6Gg!y%dKqc0HR-AFWxusbRqJi`eFbZy59TiU;LLNu`%c%O>9^jXLhFrUU1|`i
zM6vM&@q&X8`wdaV@Y?}O2?Hnyqihz2*Q^P!QWz9&d4U0TZ4gWp**Q5mhlOzJ(fmiK
zciGaVr;z$0Y6J0(gi)vvVpYOSNqRFSIA`r2-n2GZYe0OEv$gG48JfqaYdXK$J6A^s
z&ZgHiXaW6c)PdM#exNuo#~hV>8yFCpG+bT%ew``G*ijt+a0HvCJCS`Ta8khRPxt?K
zejK<Hxhjgr&kz@jh?m$P1Z%@>EBN@cbZEXff-d<=@5>uSu{G+O23m^tL(3=YI=p!1
zXV#&>D*^%_wqP4Z6rcy|4i9zyB*v&{H=&<q6caS(H8XC^1*_zist6g1lpr7hV-O(~
z=C>|ufrdT`!7Q_N;F!;DCeU@f`Q;%LlG}Fe`m)mYSKfIX9QN?vS+eVvfAzJWb^F~>
l|JQ6J1gZZ$=I~E)%kb}#p3yJ1pF}j~0r?-IzCZcX{{WGMfsOzG

literal 0
HcmV?d00001

diff --git a/run_command.py b/run_command.py
new file mode 100644
index 0000000..1d91877
--- /dev/null
+++ b/run_command.py
@@ -0,0 +1,64 @@
+import subprocess
+import time
+
+def run_bash_command(command: str, logger, windows: bool = False, blastn : bool = False):
+    """
+    Runs a command in the shell and logs the command, execution time, stdout, and stderr.
+
+    Args:
+        command (str): The command to be executed.
+        logger: The logger object used for logging.
+        windows (bool, optional): Specifies whether the environment is Windows (True) or UNIX (False). 
+                                  Defaults to False.
+
+    Returns:
+        tuple: A tuple containing the result object and the elapsed time in seconds.
+    """
+    if windows :
+        # Adjust the command for Windows environment
+        if not blastn : 
+            full_command = ("wsl " + command).replace('\\', '/')
+        else :
+            full_command =command.replace('\\', '/')
+    else:
+        # Use "bash -l -c" to run the command within the context of the bash profile on UNIX systems
+        full_command = f'bash -l -c "{command}"'
+
+    logger.info(f"Running command: {full_command}")
+    start_time = time.time()
+    result = subprocess.run(full_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+    elapsed_time = time.time() - start_time
+    logger.info(f"Command '{full_command}' took {elapsed_time:.2f} seconds to run.")
+    if result.stdout:
+        logger.info(result.stdout)
+        print(result.stdout)
+    if result.stderr:
+        logger.error("Error: " + result.stderr)
+        print(result.stderr)
+    return result, elapsed_time
+
+def run_command(command: str, logger, windows: bool = False):
+    """
+    Runs a command in the shell and logs the command, execution time, stdout, and stderr.
+
+    Args:
+        command (str): The command to be executed.
+        logger: The logger object used for logging.
+        windows (bool, optional): Specifies whether to run the command in Windows Subsystem for Linux (windows). 
+                              Defaults to False.
+
+    Returns:
+        tuple: A tuple containing the result object and the elapsed time in seconds.
+    """
+    if windows : 
+        command = ("wsl " + command).replace('\\','/')
+    logger.info(f"Running command: {command}")
+    start_time = time.time()
+    result = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+    elapsed_time = time.time() - start_time
+    logger.info(f"Command '{command}' took {elapsed_time:.2f} seconds to run.")
+    if result.stdout:
+        logger.info(result.stdout)
+    if result.stderr:
+        logger.error("Error: " + result.stderr)
+    return result, elapsed_time
\ No newline at end of file
diff --git a/test_material/rbcL_Qiagen_tomato_5000_test_file.fastq b/test_material/rbcL_Qiagen_tomato_5000_test_file.fastq
new file mode 100644
index 0000000..c22edb2
--- /dev/null
+++ b/test_material/rbcL_Qiagen_tomato_5000_test_file.fastq
@@ -0,0 +1,8 @@
+@632f5334-2c26-4f91-a611-bbcc85d83ea1 runid=84dc383f16e4874203b6120bf223ea75d3815a54 read=14 ch=417 start_time=2021-11-26T13:56:48Z flow_cell_id=FAR79813 protocol_group_id=rbcL_Qiagen_tomato sample_id=no_sample
+GTATGCTTCGTTCATTCAAATTTGGGTGTTTAGCAATTTTCGCATTTATCGTGAAACGCTTTCGCGTTTTTGGATCGGCAGGTGTTTAACCGTTTTCGCATTTATCGTGGCTTAACACCAGCGTTTTTCATTGTAACTTAAAATTATATAAGAGAAGAAGAATCTTTGATTTTTTTTTTTGAAAAAGGTAACCGAGCTTCTTTAGTAATAAGACTATTCAAATTACAATATTCGTGGAAAATCGTAATAAATATTGAAGGCATCTTTTAATAGCGAAGTTTGAACAAAATTTCCAA
++
+$$%&$$%00*268%%)($$#####%++]<;7-$$#$#$257166>>8;7820.(''''')668.0+,-0.,(()''(,,++55]<<*')10024</8&.479;00023)(())&%%$%#$#%'02UFF=)((&$#(,,*%&20577C]]/-.]]-970,+05*+(..1049;:9853453+*/..-.*&*%%&%'/*7?<9=;&&&*/10-.1/1:?A:9:71008::B]]>=//.*,*(),/)+//00347%&'%#**&%()'&'(((()'***+*33373356*,+')),0,)&
+@c4124d21-64a3-4b8e-ac5a-771ea99ff56f runid=84dc383f16e4874203b6120bf223ea75d3815a54 read=704 ch=203 start_time=2021-11-26T14:00:32Z flow_cell_id=FAR79813 protocol_group_id=rbcL_Qiagen_tomato sample_id=no_sample
+GTTGTACTTCGTTCCAGTTATCAGATGTTGGGTGTTTAGCCGTTTTCGCATTTATCATTGAAACAACCGCGTTTTCGTGCGCCGCTTCACCTACAATGGAAGTAAACATATTGGTAACAGAACCTTTATGTAAAGGTCTAAAGGAGTGGCTACATAAGCAATATATTGATCTTTTCTCCAACAACGCGCTCGATGCGGTAGCATCGCACCTTTGTAACGATCAAGACTGGTAGAGTCCATCGGTCCATACAATTGTCCATGTACCAGTAGAAGATTCGGCTGCGGCCCCTGCTACTGGGTGGAACTCCAGGTTGAGGTTACTCGGAATGCTAATATATCAGTATCCTTGGTTTGGTACTCAGGAGTATAATAAGTCAATTTGTACTCTTTAACACCAGCTTTGAATCAACACTTTAGTCTCTG
++
+%84695532.,-.:+&''+'&(%'('&%$&&..]2233-,-58>DB]479;>9899)))..-)((-0'(7879<?<,+')]67767=:/.//-./2243632221/'&',.,&&((%&'().,+')'$'$##,04434-62060-('&$$())+7:7967<L]](*(>=5;8?A>:7@67=<663/.-3-3'.5=;2++;9:;99&&()'%*%%+++,,/>@9?@HABA334+)*&)+-.0/25:750**)(()23581100/0-/44=8]2@3333101334214778743.+'(%%.45<D<8:5/*6;9942*6062069(58762226-2/]]=>''':6:.,344095-+,*)())()(&&)--,.**,+.&%/7@>;2012441+*+))68123?=660-=&56')&522688<]1+
\ No newline at end of file
diff --git a/test_utils.py b/test_utils.py
new file mode 100644
index 0000000..cc5d326
--- /dev/null
+++ b/test_utils.py
@@ -0,0 +1,47 @@
+import process_fastq
+from Bio import SeqIO, Align
+
+#In order to run the tests, enter in terminal : pytest test_utils.py
+#Warning : the 3 last tests have an extremely small chance to fail due to functions based on randomness, running the tests several times can confirm if it is the reason or not
+
+def test_read_fastq():
+    filePath = "test_material/rbcL_Qiagen_tomato_5000_test_file.fastq"
+    reads = process_fastq.read_fastq(filePath)
+    assert len(reads) == 2
+    assert str(reads[0].seq) == "GTATGCTTCGTTCATTCAAATTTGGGTGTTTAGCAATTTTCGCATTTATCGTGAAACGCTTTCGCGTTTTTGGATCGGCAGGTGTTTAACCGTTTTCGCATTTATCGTGGCTTAACACCAGCGTTTTTCATTGTAACTTAAAATTATATAAGAGAAGAAGAATCTTTGATTTTTTTTTTTGAAAAAGGTAACCGAGCTTCTTTAGTAATAAGACTATTCAAATTACAATATTCGTGGAAAATCGTAATAAATATTGAAGGCATCTTTTAATAGCGAAGTTTGAACAAAATTTCCAA"
+    assert str(reads[1].seq) == "GTTGTACTTCGTTCCAGTTATCAGATGTTGGGTGTTTAGCCGTTTTCGCATTTATCATTGAAACAACCGCGTTTTCGTGCGCCGCTTCACCTACAATGGAAGTAAACATATTGGTAACAGAACCTTTATGTAAAGGTCTAAAGGAGTGGCTACATAAGCAATATATTGATCTTTTCTCCAACAACGCGCTCGATGCGGTAGCATCGCACCTTTGTAACGATCAAGACTGGTAGAGTCCATCGGTCCATACAATTGTCCATGTACCAGTAGAAGATTCGGCTGCGGCCCCTGCTACTGGGTGGAACTCCAGGTTGAGGTTACTCGGAATGCTAATATATCAGTATCCTTGGTTTGGTACTCAGGAGTATAATAAGTCAATTTGTACTCTTTAACACCAGCTTTGAATCAACACTTTAGTCTCTG"
+    
+def test_split_fastq():
+    inputPath="test_material/rbcL_Qiagen_tomato_5000_test_file.fastq"
+    outputDir="test_material"
+    output_name="outputTest"
+    assert process_fastq.split_fastq(inputPath,outputDir,output_name) == ('test_material\\outputTest_top20.fastq', 'test_material\\outputTest_remaining80.fastq')
+
+def test_quality_scores():
+    filePath = "test_material/rbcL_Qiagen_tomato_5000_test_file.fastq"
+    reads = process_fastq.getContentFile(filePath)
+    assert reads==['@632f5334-2c26-4f91-a611-bbcc85d83ea1 runid=84dc383f16e4874203b6120bf223ea75d3815a54 read=14 ch=417 start_time=2021-11-26T13:56:48Z flow_cell_id=FAR79813 protocol_group_id=rbcL_Qiagen_tomato sample_id=no_sample\n', 'GTATGCTTCGTTCATTCAAATTTGGGTGTTTAGCAATTTTCGCATTTATCGTGAAACGCTTTCGCGTTTTTGGATCGGCAGGTGTTTAACCGTTTTCGCATTTATCGTGGCTTAACACCAGCGTTTTTCATTGTAACTTAAAATTATATAAGAGAAGAAGAATCTTTGATTTTTTTTTTTGAAAAAGGTAACCGAGCTTCTTTAGTAATAAGACTATTCAAATTACAATATTCGTGGAAAATCGTAATAAATATTGAAGGCATCTTTTAATAGCGAAGTTTGAACAAAATTTCCAA\n', '+\n', "$$%&$$%00*268%%)($$#####%++]<;7-$$#$#$257166>>8;7820.(''''')668.0+,-0.,(()''(,,++55]<<*')10024</8&.479;00023)(())&%%$%#$#%'02UFF=)((&$#(,,*%&20577C]]/-.]]-970,+05*+(..1049;:9853453+*/..-.*&*%%&%'/*7?<9=;&&&*/10-.1/1:?A:9:71008::B]]>=//.*,*(),/)+//00347%&'%#**&%()'&'(((()'***+*33373356*,+')),0,)&\n", '@c4124d21-64a3-4b8e-ac5a-771ea99ff56f runid=84dc383f16e4874203b6120bf223ea75d3815a54 read=704 ch=203 start_time=2021-11-26T14:00:32Z flow_cell_id=FAR79813 protocol_group_id=rbcL_Qiagen_tomato sample_id=no_sample\n', 'GTTGTACTTCGTTCCAGTTATCAGATGTTGGGTGTTTAGCCGTTTTCGCATTTATCATTGAAACAACCGCGTTTTCGTGCGCCGCTTCACCTACAATGGAAGTAAACATATTGGTAACAGAACCTTTATGTAAAGGTCTAAAGGAGTGGCTACATAAGCAATATATTGATCTTTTCTCCAACAACGCGCTCGATGCGGTAGCATCGCACCTTTGTAACGATCAAGACTGGTAGAGTCCATCGGTCCATACAATTGTCCATGTACCAGTAGAAGATTCGGCTGCGGCCCCTGCTACTGGGTGGAACTCCAGGTTGAGGTTACTCGGAATGCTAATATATCAGTATCCTTGGTTTGGTACTCAGGAGTATAATAAGTCAATTTGTACTCTTTAACACCAGCTTTGAATCAACACTTTAGTCTCTG\n', '+\n', "%84695532.,-.:+&''+'&(%'('&%$&&..]2233-,-58>DB]479;>9899)))..-)((-0'(7879<?<,+')]67767=:/.//-./2243632221/'&',.,&&((%&'().,+')'$'$##,04434-62060-('&$$())+7:7967<L]](*(>=5;8?A>:7@67=<663/.-3-3'.5=;2++;9:;99&&()'%*%%+++,,/>@9?@HABA334+)*&)+-.0/25:750**)(()23581100/0-/44=8]2@3333101334214778743.+'(%%.45<D<8:5/*6;9942*6062069(58762226-2/]]=>''':6:.,344095-+,*)())()(&&)--,.**,+.&%/7@>;2012441+*+))68123?=660-=&56')&522688<]1+"]
+    quality = process_fastq.extractReadQuality(reads)
+    assert quality==["$$%&$$%00*268%%)($$#####%++]<;7-$$#$#$257166>>8;7820.(''''')668.0+,-0.,(()''(,,++55]<<*')10024</8&.479;00023)(())&%%$%#$#%'02UFF=)((&$#(,,*%&20577C]]/-.]]-970,+05*+(..1049;:9853453+*/..-.*&*%%&%'/*7?<9=;&&&*/10-.1/1:?A:9:71008::B]]>=//.*,*(),/)+//00347%&'%#**&%()'&'(((()'***+*33373356*,+')),0,)&", "%84695532.,-.:+&''+'&(%'('&%$&&..]2233-,-58>DB]479;>9899)))..-)((-0'(7879<?<,+')]67767=:/.//-./2243632221/'&',.,&&((%&'().,+')'$'$##,04434-62060-('&$$())+7:7967<L]](*(>=5;8?A>:7@67=<663/.-3-3'.5=;2++;9:;99&&()'%*%%+++,,/>@9?@HABA334+)*&)+-.0/25:750**)(()23581100/0-/44=8]2@3333101334214778743.+'(%%.45<D<8:5/*6;9942*6062069(58762226-2/]]=>''':6:.,344095-+,*)())()(&&)--,.**,+.&%/7@>;2012441+*+))68123?=660-=&56')&522688<]1"]
+    converted_quality = process_fastq.convertQuality(quality)
+    assert converted_quality==[3, 3, 4, 5, 3, 3, 4, 15, 15, 9, 17, 21, 23, 4, 4, 8, 7, 3, 3, 2, 2, 2, 2, 2, 4, 10, 10, 60, 27, 26, 22, 12, 3, 3, 2, 3, 2, 3, 17, 20, 22, 16, 21, 21, 29, 29, 23, 26, 22, 23, 17, 15, 13, 7, 6, 6, 6, 6, 6, 8, 21, 21, 23, 13, 15, 10, 11, 12, 15, 13, 11, 7, 7, 8, 6, 6, 7, 11, 11, 10, 10, 20, 20, 60, 27, 27, 9, 6, 8, 16, 15, 15, 17, 19, 27, 14, 23, 5, 13, 19, 22, 24, 26, 15, 15, 15, 17, 18, 8, 7, 7, 8, 8, 5, 4, 4, 3, 4, 2, 3, 2, 4, 6, 15, 17, 52, 37, 37, 28, 8, 7, 7, 5, 3, 2, 7, 11, 11, 9, 4, 5, 17, 15, 20, 22, 22, 34, 60, 60, 14, 12, 13, 60, 60, 12, 24, 22, 15, 11, 10, 15, 20, 9, 10, 7, 13, 13, 16, 15, 19, 24, 26, 25, 24, 23, 20, 18, 19, 20, 18, 10, 9, 14, 13, 13, 12, 13, 9, 5, 9, 4, 4, 5, 4, 6, 14, 9, 22, 30, 27, 24, 28, 26, 5, 5, 5, 9, 14, 16, 15, 12, 13, 16, 14, 16, 25, 30, 32, 25, 24, 25, 22, 16, 15, 15, 23, 25, 25, 33, 60, 60, 29, 28, 14, 14, 13, 9, 11, 9, 7, 8, 11, 14, 8, 10, 14, 14, 15, 15, 18, 19, 22, 4, 5, 6, 4, 2, 9, 9, 5, 4, 7, 8, 6, 5, 6, 7, 7, 7, 7, 8, 6, 9, 9, 9, 10, 9, 18, 18, 18, 22, 18, 18, 20, 21, 9, 11, 10, 6, 8, 8, 11, 15, 11, 8, 5, 4, 23, 19, 21, 24, 20, 20, 18, 17, 13, 11, 12, 13, 25, 10, 5, 6, 6, 10, 6, 5, 7, 4, 6, 7, 6, 5, 4, 3, 5, 5, 13, 13, 60, 17, 17, 18, 18, 12, 11, 12, 20, 23, 29, 35, 33, 60, 19, 22, 24, 26, 29, 24, 23, 24, 24, 8, 8, 8, 13, 13, 12, 8, 7, 7, 12, 15, 6, 7, 22, 23, 22, 24, 27, 30, 27, 11, 10, 6, 8, 60, 21, 22, 22, 21, 22, 28, 25, 14, 13, 14, 14, 12, 13, 14, 17, 17, 19, 18, 21, 18, 17, 17, 17, 16, 14, 6, 5, 6, 11, 13, 11, 5, 5, 7, 7, 4, 5, 6, 7, 8, 13, 11, 10, 6, 8, 6, 3, 6, 3, 2, 2, 11, 15, 19, 19, 18, 19, 12, 21, 17, 15, 21, 15, 12, 7, 6, 5, 3, 3, 7, 8, 8, 10, 22, 25, 22, 24, 21, 22, 27, 43, 60, 60, 7, 9, 7, 29, 28, 20, 26, 23, 30, 32, 29, 25, 22, 31, 21, 22, 28, 27, 21, 21, 18, 14, 13, 12, 18, 12, 18, 6, 13, 20, 28, 26, 17, 10, 10, 26, 24, 25, 26, 24, 24, 5, 5, 7, 8, 6, 4, 9, 4, 4, 10, 10, 10, 11, 11, 14, 29, 31, 24, 30, 31, 39, 32, 33, 32, 18, 18, 19, 10, 8, 9, 5, 8, 10, 12, 13, 15, 14, 17, 20, 25, 22, 20, 15, 9, 9, 8, 7, 7, 8, 17, 18, 20, 23, 16, 16, 15, 15, 14, 15, 12, 14, 19, 19, 28, 23, 60, 17, 31, 18, 18, 18, 18, 16, 15, 16, 18, 18, 19, 17, 16, 19, 22, 22, 23, 22, 19, 18, 13, 10, 6, 7, 4, 4, 13, 19, 20, 27, 35, 27, 23, 25, 20, 14, 9, 21, 26, 24, 24, 19, 17, 9, 21, 15, 21, 17, 15, 21, 24, 7, 20, 23, 22, 21, 17, 17, 17, 21, 12, 17, 14, 60, 60, 28, 29, 6, 6, 6, 25, 21, 25, 13, 11, 18, 19, 19, 15, 24, 20, 12, 10, 11, 9, 8, 7, 8, 8, 7, 8, 7, 5, 5, 8, 12, 12, 11, 13, 9, 9, 11, 10, 13, 5, 4, 14, 22, 31, 29, 26, 17, 15, 16, 17, 19, 19, 16, 10, 9, 10, 8, 8, 21, 23, 16, 17, 18, 30, 28, 21, 21, 15, 12, 28, 5, 20, 21, 6, 8, 5, 20, 17, 17, 21, 23, 23, 27, 60, 16]
+
+def test_create_random_sequence():
+    seed=1
+    random_seq = process_fastq.create_random_sequence(50,1)
+    assert random_seq == "TAGACCCCTACACCACGTAGAAAACTCATCCTGTTCGACATGAGCTGGCC"
+
+def test_mutation():
+    sequence = "ATCG"
+    mutated_sequence = process_fastq.damage_sequence(sequence, mutation_rate=1.0)
+    assert mutated_sequence != sequence
+
+def test_deletion():
+    sequence = "ATCG"
+    deleted_sequence = process_fastq.damage_sequence(sequence, deletion_rate=1.0)
+    assert deleted_sequence != sequence
+
+def test_insertion():
+    sequence = "ATCG"
+    inserted_sequence = process_fastq.damage_sequence(sequence, insertion_rate=0.98)
+    assert inserted_sequence != sequence
\ No newline at end of file

From a6a1eb926f0a4b0a9e6d88154e42267dd73ca231 Mon Sep 17 00:00:00 2001
From: ykeren6 <yair.keren@epfl.ch>
Date: Wed, 10 Apr 2024 18:20:28 +0200
Subject: [PATCH 2/9] unitests coded and docstrings formated

---
 test_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_utils.py b/test_utils.py
index cc5d326..8b85ef7 100644
--- a/test_utils.py
+++ b/test_utils.py
@@ -28,7 +28,7 @@ def test_quality_scores():
 
 def test_create_random_sequence():
     seed=1
-    random_seq = process_fastq.create_random_sequence(50,1)
+    random_seq = process_fastq.create_random_sequence(50,seed)
     assert random_seq == "TAGACCCCTACACCACGTAGAAAACTCATCCTGTTCGACATGAGCTGGCC"
 
 def test_mutation():

From cdb0a990776adc919e20f004ea2f61e847f12786 Mon Sep 17 00:00:00 2001
From: ykeren6 <yair.keren@epfl.ch>
Date: Wed, 10 Apr 2024 18:32:42 +0200
Subject: [PATCH 3/9] docstrings formated for access_genbank.py

---
 access_genbank.py | 62 +++++++++++++++++++++++------------------------
 1 file changed, 30 insertions(+), 32 deletions(-)

diff --git a/access_genbank.py b/access_genbank.py
index 32ef2a8..88b1c6c 100644
--- a/access_genbank.py
+++ b/access_genbank.py
@@ -9,19 +9,18 @@
 #-------------------------------
 
 def build_search_term(gene_name, min_len=0, max_len=-1):
-    """ generate a search query for NCBI's nucleotide database (GenBank). Bio.Entrez does not provide filters such as sequence length and gene name
+    """ 
+    Generate a search query for NCBI's nucleotide database (GenBank). Bio.Entrez does not provide filters such as sequence length and gene name
     so a search query keywords must be used instead.
     Used to make database queries in extract_database and download_database
     
-    Parameters
-    ----------
-        gene_name: (str) name of the gene of interest
-        min_len: (int, optional) lower bound of the sequence length filter, by default 0
-        max_len: (int, optional) upper bound of the sequence length filter, by default -1 means no limit
+    Parameters: 
+        gene_name(str): name of the gene of interest
+        min_len(int, optional): lower bound of the sequence length filter, by default 0
+        max_len(int, optional): upper bound of the sequence length filter, by default -1 means no limit
     
-    Returns
-    ----------
-        term: (str) search query
+    Returns:
+        term(str): search query
     """
     term = f"{gene_name}[Gene Name]"
     if max_len == -1:
@@ -32,17 +31,16 @@ def build_search_term(gene_name, min_len=0, max_len=-1):
 
 
 def extract_database(gene_name, seqlen_start=0, seqlen_stop=-1):
-    '''Request data on GenBank from NCBI (with Entrez module), with filtering options
+    '''
+    Request data on GenBank from NCBI (with Entrez module), with filtering options
 
-    Parameters
-    ----------
-        gene_name: (str) name of the gene to extract from GenBank
-        seqlen_start: (int, optional) lower bound of the sequence length filter, by default 0
-        seqlen_stop: (int, optional) upper bound of the sequence length filter, by default -1 means no limit
+    Parameters:
+        gene_name(str): name of the gene to extract from GenBank
+        seqlen_start(int, optional): lower bound of the sequence length filter, by default 0
+        seqlen_stop(int, optional): upper bound of the sequence length filter, by default -1 means no limit
 
-    Returns
-    ----------
-        seq_record: (str) raw data of all sequences
+    Returns:
+        seq_record(str): raw data of all sequences
     '''
     handle = Entrez.esearch(db= "nucleotide", term= build_search_term(gene_name, seqlen_start, seqlen_stop), retmax = 10000)
     record = Entrez.read(handle)
@@ -60,15 +58,16 @@ def extract_database(gene_name, seqlen_start=0, seqlen_stop=-1):
 
 
 def download_database(gene_name, seqlen_start=0, seqlen_stop=-1):
-    '''Download data to a .fasta file, with filtering options
+    '''
+    Download data to a .fasta file, with filtering options
 
     Args:
-        gene_name: (str) name of the gene to extract from GenBank
-        seqlen_start: (int) lower bound of the sequence length filter
-        seqlen_stop: (int) upper bound of the sequence length filter
+        gene_name(str): name of the gene to extract from GenBank
+        seqlen_start(int): lower bound of the sequence length filter
+        seqlen_stop(int): upper bound of the sequence length filter
 
     Returns:
-        data_path: (str) path to downloaded data .fasta file
+        data_path(str): path to downloaded data .fasta file
     '''
     data_path = f"{gene_name}[{seqlen_start},{seqlen_stop}].fasta"
     with open(data_path, mode="w") as file:
@@ -77,17 +76,16 @@ def download_database(gene_name, seqlen_start=0, seqlen_stop=-1):
 
 def download_sequence(species, gene_name, dst, start_length=None, stop_length= None, id = None, permissive_search = True):
     """
-    download sequence from GenBank through the Entrez database. 
+    Download sequence from GenBank through the Entrez database. 
 
     Parameters:
-    ----------
-    species(str): name of species
-    gene_name(str): name of gene
-    dst(str,Path-like): destination file path
-    start_length(int): minimum length of sequence
-    stop_length(int): maximum length of sequence
-    id(list): list of NCBi ids of sequences to download. If provided, overrides gene_name and species.
-    permissive_search(bool, default = True): when True, if Advanced NCBI query returns nothing, replace it with a less precise general query.
+        species(str): name of species
+        gene_name(str): name of gene
+        dst(str,Path-like): destination file path
+        start_length(int): minimum length of sequence
+        stop_length(int): maximum length of sequence
+        id(list): list of NCBi ids of sequences to download. If provided, overrides gene_name and species.
+        permissive_search(bool, default = True): when True, if Advanced NCBI query returns nothing, replace it with a less precise general query.
     """
     
     if id == None:

From d9e1dd82e15acc6c693864b6ed03e6ea97a0a01d Mon Sep 17 00:00:00 2001
From: Ghassan Abboud <ghassan.abboud@epfl.ch>
Date: Tue, 2 Apr 2024 14:12:39 +0300
Subject: [PATCH 4/9] add quality_scores functions

---
 process_fastq.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/process_fastq.py b/process_fastq.py
index 7b794a8..7f45a26 100644
--- a/process_fastq.py
+++ b/process_fastq.py
@@ -2,11 +2,11 @@
 import numpy as np
 from Bio import SeqIO, Align
 import os.path as ospath
+
 import os
 import gzip
 import shutil
-import sys 
-import matplotlib.pyplot as plt
+import sys import matplotlib.pyplot as plt
 
 
 #----------------------------
@@ -33,11 +33,8 @@ def extract_gz(src_file, dst_file):
 def read_fastq(fastq_filepath=None):
     """
     Read a fastq file and return the reads.
-    
-    Args:
-        fastq_filepath (str): filepath of the .fastq file
-    Returns: 
-        reads(list of Bio.SeqRecord.SeqRecord): reads from the fastq file
+    :param fastq_filepath: filepath of the .fastq file [str]
+    :return: reads from the fastq file [list of Bio.SeqRecord.SeqRecord]
     """
     if fastq_filepath is None: fastq_filepath = "data/rbcL_Qiagen_tomato.fastq" # default path (example)
     if fastq_filepath.lower().endswith('.gz'):

From 67163147926a56618a0e482845ea5b4cb7e76908 Mon Sep 17 00:00:00 2001
From: ghassanabboud <ghassan.abboud@epfl.ch>
Date: Sun, 7 Apr 2024 15:55:40 +0200
Subject: [PATCH 5/9] fix process_fastq import

---
 process_fastq.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/process_fastq.py b/process_fastq.py
index 7f45a26..48209c0 100644
--- a/process_fastq.py
+++ b/process_fastq.py
@@ -6,7 +6,8 @@
 import os
 import gzip
 import shutil
-import sys import matplotlib.pyplot as plt
+import sys 
+import matplotlib.pyplot as plt
 
 
 #----------------------------

From c78b2aa39b21ca92861d06109cafefdd0e07389d Mon Sep 17 00:00:00 2001
From: ghassanabboud <ghassan.abboud@epfl.ch>
Date: Sun, 7 Apr 2024 16:10:06 +0200
Subject: [PATCH 6/9] update xstract_fastq

---
 process_fastq.py | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/process_fastq.py b/process_fastq.py
index 48209c0..ec680cb 100644
--- a/process_fastq.py
+++ b/process_fastq.py
@@ -110,7 +110,7 @@ def get_file_iterator(filename):
             break
 
 
-def extract_fastq(main_dir, sample_nb, dst):
+def extract_fastq(main_dir, barcode_nb, dst):
     """
     Extract all fastq files under a certain barcode/sample number from an expedition results folder.
 
@@ -121,16 +121,23 @@ def extract_fastq(main_dir, sample_nb, dst):
     Returns:
         None
     """
+    search_dirs=[]
     for root, dirs, files in os.walk(main_dir):
         if "fastq_pass" in dirs:
-            pass_dir = ospath.join(root, "fastq_pass")
-            break
-    for root, dirs, files in os.walk(pass_dir):
-        if f"barcode{sample_nb}" in dirs:
-            fastq_dir = ospath.join(root, f"barcode{sample_nb}")
-        elif f"barcode0{sample_nb}" in dirs:
-            fastq_dir = ospath.join(root, f"barcode0{sample_nb}")
-    concatenate_fastq(fastq_dir, dst)
+            search_dirs.append(ospath.join(root, "fastq_pass"))
+        if "barcoding" in dirs:
+            search_dirs.append(ospath.join(root, "barcoding"))
+        
+    for search_dir in search_dirs:
+        for root, dirs, files in os.walk(search_dir):
+            if f"barcode{barcode_nb}" in dirs:
+                fastq_dir = ospath.join(root, f"barcode{barcode_nb}")
+                print(fastq_dir)
+                concatenate_fastq(fastq_dir, dst)
+            elif f"barcode0{barcode_nb}" in dirs:
+                fastq_dir = ospath.join(root, f"barcode0{barcode_nb}")
+                print(fastq_dir)
+                concatenate_fastq(fastq_dir, dst)
 
 #----------------------------------
 #GET AND VISUALIZE QUALITY SCORES

From 8933b9a2bcbfee85261debd6a830b778c4447f7e Mon Sep 17 00:00:00 2001
From: ykeren6 <yair.keren@epfl.ch>
Date: Wed, 3 Apr 2024 14:42:34 +0200
Subject: [PATCH 7/9] added uniformed docstrings and pytests

---
 process_fastq.py | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/process_fastq.py b/process_fastq.py
index ec680cb..c7fc7d6 100644
--- a/process_fastq.py
+++ b/process_fastq.py
@@ -34,8 +34,11 @@ def extract_gz(src_file, dst_file):
 def read_fastq(fastq_filepath=None):
     """
     Read a fastq file and return the reads.
-    :param fastq_filepath: filepath of the .fastq file [str]
-    :return: reads from the fastq file [list of Bio.SeqRecord.SeqRecord]
+    
+    Args:
+        fastq_filepath (str): filepath of the .fastq file
+    Returns: 
+        reads(list of Bio.SeqRecord.SeqRecord): reads from the fastq file
     """
     if fastq_filepath is None: fastq_filepath = "data/rbcL_Qiagen_tomato.fastq" # default path (example)
     if fastq_filepath.lower().endswith('.gz'):
@@ -110,7 +113,7 @@ def get_file_iterator(filename):
             break
 
 
-def extract_fastq(main_dir, barcode_nb, dst):
+def extract_fastq(main_dir, sample_nb, dst):
     """
     Extract all fastq files under a certain barcode/sample number from an expedition results folder.
 
@@ -121,23 +124,16 @@ def extract_fastq(main_dir, barcode_nb, dst):
     Returns:
         None
     """
-    search_dirs=[]
     for root, dirs, files in os.walk(main_dir):
         if "fastq_pass" in dirs:
-            search_dirs.append(ospath.join(root, "fastq_pass"))
-        if "barcoding" in dirs:
-            search_dirs.append(ospath.join(root, "barcoding"))
-        
-    for search_dir in search_dirs:
-        for root, dirs, files in os.walk(search_dir):
-            if f"barcode{barcode_nb}" in dirs:
-                fastq_dir = ospath.join(root, f"barcode{barcode_nb}")
-                print(fastq_dir)
-                concatenate_fastq(fastq_dir, dst)
-            elif f"barcode0{barcode_nb}" in dirs:
-                fastq_dir = ospath.join(root, f"barcode0{barcode_nb}")
-                print(fastq_dir)
-                concatenate_fastq(fastq_dir, dst)
+            pass_dir = ospath.join(root, "fastq_pass")
+            break
+    for root, dirs, files in os.walk(pass_dir):
+        if f"barcode{sample_nb}" in dirs:
+            fastq_dir = ospath.join(root, f"barcode{sample_nb}")
+        elif f"barcode0{sample_nb}" in dirs:
+            fastq_dir = ospath.join(root, f"barcode0{sample_nb}")
+    concatenate_fastq(fastq_dir, dst)
 
 #----------------------------------
 #GET AND VISUALIZE QUALITY SCORES

From a23092f7b4d5f6e3ffd70703f9686a42acbabb9f Mon Sep 17 00:00:00 2001
From: ykeren6 <yair.keren@epfl.ch>
Date: Fri, 10 May 2024 11:49:58 +0200
Subject: [PATCH 8/9] added fake fastq generation to repo

---
 create_fake_dataset.py |  34 +++++++
 generate_fastq.py      | 214 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 248 insertions(+)
 create mode 100644 create_fake_dataset.py
 create mode 100644 generate_fastq.py

diff --git a/create_fake_dataset.py b/create_fake_dataset.py
new file mode 100644
index 0000000..a343afe
--- /dev/null
+++ b/create_fake_dataset.py
@@ -0,0 +1,34 @@
+from access_genbank import download_sequence_from_id
+from generate_fastq import generate_fastq, multiplex
+import os
+import os.path as ospath
+import argparse
+from Bio import SeqIO
+
+def main():
+
+    destination_folder = "test_fake4"
+    ids= ["ON409953.1", "AF288129.1", "AF288126.1", "AF288125.1" ]
+    multiplexed = True
+
+
+
+    destination_folder_path= ospath.join("data", destination_folder)
+    if not ospath.exists(destination_folder_path):
+        os.makedirs(destination_folder_path)
+
+    record_parser = download_sequence_from_id(ids)
+
+    for id, record in zip(ids,record_parser):
+        new_demultiplexed_fastq_path = ospath.join(destination_folder_path, str(id)+".fastq")
+        generate_fastq(record.seq, 100, new_demultiplexed_fastq_path)
+    print("left for loop")
+    if multiplexed:
+        multiplex_path=ospath.join(destination_folder_path,"multiplexed.fastq")
+        multiplex([destination_folder_path],multiplex_path)
+
+    for record in SeqIO.parse(multiplex_path,"fastq"):
+        print(record)
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/generate_fastq.py b/generate_fastq.py
new file mode 100644
index 0000000..272665e
--- /dev/null
+++ b/generate_fastq.py
@@ -0,0 +1,214 @@
+from Bio import SeqIO
+import random
+from Bio.Seq import Seq
+import numpy as np
+import os
+import os.path as ospath
+import numpy as np
+import scipy.stats
+import matplotlib.pyplot as plt
+
+
+def generate_weighted_sequence_variant(sequence, weights=[0.25, 0.25, 0.25, 0.25]):
+    """
+    Flips an initial sequence as it could happen when using the MinIon tool
+
+    Arguments:
+    sequence (Seq): sequence of interest
+    weights(list of 4 float): probability of each sequence version (initial sequence, reverse complement, complement, reverse initial)
+
+    Returns:
+        selected_variant(Seq): randomly reversed sequence
+        variant_description(str): string indicating which variant is selected
+    """
+    variants = ["initial_sequence", "reverse_complement", "complement", "reverse_initial"]
+    variant_sequences = [sequence, sequence.reverse_complement(), sequence.complement(), sequence[::-1]]
+    
+    selected_variant_index = random.choices(range(len(variants)), weights=weights, k=1)[0]
+    selected_variant = variant_sequences[selected_variant_index]
+    variant_description = variants[selected_variant_index]
+
+    return selected_variant, variant_description
+
+
+
+def break_prob_function(position, sequence_length):
+    """
+    Example of probability function usable for the previous function
+
+    Arguments:
+        position(int): position in sequence
+        sequence_length(int)
+    Returns:
+        probability at a specific position in sequence(float)
+    """
+    max_prob = 0.5  # Max probability at start and end of sequence
+    return max_prob * (position / sequence_length)
+
+def bimodal_distribution(position,sequence_length):
+    """
+    Probability density function of a bimodal normal distribution, highest chance is centered around positions at one or two thirds of sequence length
+
+    Parameters
+    ----------
+    position: position in the sequence for which probability of event is calculated
+    sequence_length: length of sequence
+
+    Returns
+    ---------
+    probability of event, between 0 and 1
+    """
+    peaks = [sequence_length/3, 2*sequence_length/3]
+    return 0.5* scipy.stats.norm(peaks[0],sequence_length/15).pdf(position) + 0.5*scipy.stats.norm(peaks[1],sequence_length/15).pdf(position)
+
+
+def break_sequence_with_probability(sequence, break_prob_function = bimodal_distribution, nbreaks = 2, take_longest= False):
+    """
+    Simulates breakages in the sequence, which could happen using the MinIon tool
+
+    Arguments:
+        sequence(Seq): sequence of interest
+        break_prob_funtion(function): probability function used as weight for the breakages
+
+    Returns:
+        final_sequence (Seq): final sequence after breakages
+        break_info (dict): dictionary containing information about the breakages
+    """
+    new_sequence=sequence
+    break_info = {'number_of_breaks': 0, 'part_taken': ''}
+    
+    for i, base in enumerate(sequence):
+        # Computes the breakage probability
+        break_prob = break_prob_function(i, len(sequence))
+
+        # Checks if sequence breaks at this iteration
+        if random.random() <= break_prob:
+            if nbreaks > 0:
+                new_sequence = new_sequence[:i+break_info["number_of_breaks"]] +"N" +new_sequence[i+break_info["number_of_breaks"]:]
+                nbreaks -= 1   # Breakage marker is 'N'
+                break_info['number_of_breaks'] += 1
+    broken_parts = new_sequence.split("N")
+    if take_longest:
+        length_index = lambda index: len(broken_parts[index])
+        final_seq_index = max(range(len(broken_parts)), key=length_index)
+    else:
+        final_seq_index = random.choice(range(len(broken_parts)))
+    
+    final_seq=broken_parts[final_seq_index]
+    break_info["part_taken"] = final_seq_index
+    
+    return final_seq, break_info
+
+
+def mutate(base):
+    """
+    Simulates substitution mutation
+
+    Arguments:
+        base(char)
+
+    Returns:
+        muted base (char)
+    """
+    # Bases list
+    bases = ['A', 'T', 'C', 'G']
+    if base not in bases:
+        raise ValueError("Invalid base provided. Base must be one of 'A', 'T', 'C', or 'G'.")
+    bases.remove(base)
+    # Randomly selected new base
+    return random.choice(bases)
+
+
+def assign_quality_scores(sequence, mutation_probability=0.1,mutation_mean = 16,mutation_sd = 3,basic_mean = 48,basic_sd = 3):
+    """
+    Assigns fake scores for each base, following a different normal law if base is mutated or not
+    
+    Arguments:
+        sequence(Seq): sequence of interest
+        mutation_probability(float)
+        mutation_mean(int): mean of normal law associated with mutation
+        mutation_sd(int): standard deviation of normal law associated with mutation
+        basic_mean(int): mean of normal law associated with no-change bases
+        mutation_mean(int): standard deviation of normal law associated with no-change bases
+    
+    Returns:
+        quality score of sequence (in base 64 characters -> ASCII)
+        nb_mutations(int): number of mutations
+        mutations_positions(array of ints): positions of mutations
+    """
+    quality_scores = []
+    base64_table = """!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~"""
+    nb_mutations=0
+    mutations_positions= []
+    for i, base in enumerate(sequence):
+        # Computes score for each base depending on mutation
+        if random.random() < mutation_probability:
+            # If mutation needed
+            base = mutate(base)
+            nb_mutations+=1
+            mutations_positions.append(i+1)
+            quality_score = np.random.normal(mutation_mean, mutation_sd)
+        else:
+            # If no mutation needed
+            quality_score = np.random.normal(basic_mean, basic_sd)
+        # Limits the score between 0 and 93 (ASCII go from 33 to 126)
+        quality_score = max(min(quality_score, 93), 0)
+        base64_score = base64_table[int(round(quality_score))]
+        quality_scores.append(base64_score)
+    return ''.join(quality_scores), nb_mutations, mutations_positions
+
+def generate_fastq(input_seq, n_reads, dst, variants_weights= [0.25, 0.25,0.25,0.25], break_prob_function=break_prob_function, mutation_prob =0.1, quality_score_params= [10,2,50,5], nbreaks=2, take_longest=False):
+
+    with open(dst, "a") as handle: # Operations on newly created or updated file
+        for i in range(n_reads) :
+            seq, orientation = generate_weighted_sequence_variant(input_seq,variants_weights)
+            broken_seq, break_info = break_sequence_with_probability(seq,break_prob_function, nbreaks,take_longest)
+            quality_string, n_mutations,mutations_positions = assign_quality_scores(broken_seq,mutation_prob,quality_score_params[0],quality_score_params[1],quality_score_params[2],quality_score_params[3])
+
+            variant_str = "Orientation_of_sequence="+orientation+" "
+            breakage_str = "Number_of_breakages="+str(break_info['number_of_breaks'])+" Part_taken="+str(break_info['part_taken'])+" "
+            mutations_str = "Number_of_mutations="+str(n_mutations)+" Positions="+str(mutations_positions)
+
+            handle.write(f"@read{i} "+variant_str+breakage_str+mutations_str+'\n') #writes the sequence id
+            handle.write(str(broken_seq)+ '\n') #writes the sequence generated by transforming it into string
+            handle.write("+\n") #writes the + separator
+            handle.write(quality_string + '\n') #writes the quality score
+
+def multiplex(folder_paths: list, dst: str):
+    """
+    merge multiple fastq files into one, while writing in the description the origin of each read
+
+    Parameters
+    ----------
+    folder_paths: list of folder paths to look for fastq files for merging
+    dst: path of the output merged fastq file
+
+    """
+    parsers=dict()
+    for folder_path in folder_paths:
+        for root,_,files in os.walk(folder_path):
+            for file in files:
+                if file.endswith(".fastq"):
+                    file_path = ospath.join(root,file)
+                    parsers[file_path] = list(SeqIO.parse(file_path, "fastq"))
+
+    with open(dst, "a") as writer:
+        print("inside writing")
+        while parsers:
+            parent_file_path = random.choice(list(parsers.keys()))
+            current_list=parsers[parent_file_path]
+            index= random.randint(0, len(current_list)-1)
+            seqrecord = current_list[index]
+            seqrecord.description = seqrecord.description + " source=" + parent_file_path
+            SeqIO.write(seqrecord, writer, "fastq")
+            current_list.pop(index)
+            print("index popped")
+            if not current_list:
+                print("list popped")
+                parsers.pop(parent_file_path)
+
+
+
+
+
+

From c2b45795173da99cbeae694ecf656f01db6174ce Mon Sep 17 00:00:00 2001
From: ykeren6 <yair.keren@epfl.ch>
Date: Fri, 10 May 2024 11:57:03 +0200
Subject: [PATCH 9/9] added fake fastq generation to repo

---
 access_genbank.py | 83 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 59 insertions(+), 24 deletions(-)

diff --git a/access_genbank.py b/access_genbank.py
index 88b1c6c..d045394 100644
--- a/access_genbank.py
+++ b/access_genbank.py
@@ -74,43 +74,78 @@ def download_database(gene_name, seqlen_start=0, seqlen_stop=-1):
         file.write(extract_database(gene_name, seqlen_start, seqlen_stop))
     return data_path
 
-def download_sequence(species, gene_name, dst, start_length=None, stop_length= None, id = None, permissive_search = True):
+def download_sequence_from_id(id: list, dst = None):
+    """
+    download sequence from GenBank through the Entrez database using unique identifiers.
+
+    Args:
+        id (list): list of IDs of sequences to download from GenBank
+        dst(str): destination file path
+
+    Returns:
+        Bio.SeqIO.FastaIO.FastaIterator: iterator over the records corresponding to the input ids
+    
+    """
+    handle = Entrez.efetch(db="nucleotide", id=id, retmode = "fasta", rettype = "fasta")
+    record_parser = SeqIO.parse(handle, "fasta")
+    if dst != None:
+        with open(dst, mode="a") as writer:
+            SeqIO.write(record_parser,writer,"fasta")
+        return SeqIO.parse(dst,"fasta")
+    return record_parser
+
+def get_species_from_ids(ids:list):
+    """
+    Return the species from GenBank entries with certain ids.
+
+    Parameters:
+        ids: list of ids to search
+
+    Returns:
+        dictionary that matches ids to organism names
+    
+    """
+    species_dict= dict()
+    for id in ids:
+        handle = Entrez.efetch(db="nucleotide", id=id, retmode = "gb", rettype = "gb")
+        records = SeqIO.parse(handle,"genbank")
+        one_record = next(records)
+        species_dict[id]=one_record.annotations["organism"]
+    return species_dict
+
+def download_sequence(species, gene_name, dst = None, start_length=None, stop_length= None, permissive_search = True):
     """
     Download sequence from GenBank through the Entrez database. 
 
     Parameters:
         species(str): name of species
         gene_name(str): name of gene
-        dst(str,Path-like): destination file path
+        dst(str,Path-like): destination file path, if None does not write to a file, only returns 
         start_length(int): minimum length of sequence
         stop_length(int): maximum length of sequence
         id(list): list of NCBi ids of sequences to download. If provided, overrides gene_name and species.
         permissive_search(bool, default = True): when True, if Advanced NCBI query returns nothing, replace it with a less precise general query.
+    
+    Returns:
+        Bio.SeqRecord.SeqRecord: sequence found from GenBank
+
     """
     
-    if id == None:
-        search_term = f"{gene_name}[Gene Name] AND {species}[Organism]"
-        if start_length!= None or stop_length!= None:
-                search_term += f" {start_length}:{stop_length}[Sequence Length]"
+
+    search_term = f"{gene_name}[Gene Name] AND {species}[Organism]"
+    if start_length!= None or stop_length!= None:
+            search_term += f" {start_length}:{stop_length}[Sequence Length]"
+    handle = Entrez.esearch(db = "nucleotide", term = search_term, retmax = 1, idtype = "acc")
+    search_result = Entrez.read(handle)
+    handle.close()
+    id = search_result["IdList"]
+    if len(id)==0 and permissive_search:
+        search_term = f"{gene_name} {species} {start_length}:{stop_length}[Sequence Length]"
         handle = Entrez.esearch(db = "nucleotide", term = search_term, retmax = 1, idtype = "acc")
         search_result = Entrez.read(handle)
         handle.close()
         id = search_result["IdList"]
-        n=0
-        for i in id:
-            n+=1
-        if n==0 and permissive_search:
-            search_term = f"{gene_name} {species} {start_length}:{stop_length}[Sequence Length]"
-            handle = Entrez.esearch(db = "nucleotide", term = search_term, retmax = 1, idtype = "acc")
-            search_result = Entrez.read(handle)
-            handle.close()
-            id = search_result["IdList"]
-    n=0
-    for i in id:
-        n+=1
-    if n==1:
-        handle = Entrez.efetch(db="nucleotide", id=id, retmode = "fasta", rettype = "fasta")
-        sequence = handle.read()
-        handle.close()
-        with open(dst, mode="a") as writer:
-            writer.write(sequence)
\ No newline at end of file
+    if len(id)==1:
+        return next(download_sequence_from_id(id,dst))
+    else: 
+         raise ValueError("Your search did not return any results")