diff --git a/Project.toml b/Project.toml index 216ee17..f72c8dc 100644 --- a/Project.toml +++ b/Project.toml @@ -3,9 +3,6 @@ uuid = "c2308a5c-f048-11e8-3e8a-31650f418d12" authors = ["Sabrina J. Ward ", "Jakob N. Nissen "] version = "2.1.3" -[weakdeps] -BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" - [deps] Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b" BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea" @@ -14,12 +11,15 @@ PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" StringViews = "354b36f9-a18e-4713-926e-db85100087ba" TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" +[weakdeps] +BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" + [extensions] BioSequencesExt = "BioSequences" [compat] Automa = "1" -BioGenerics = "0.1.2" +BioGenerics = "0.1.3" BioSequences = "3" PrecompileTools = "1" StringViews = "1" @@ -27,10 +27,11 @@ TranscodingStreams = "0.9.5" julia = "1.6" [extras] -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193" FormatSpecimens = "3372ea36-2a1a-11e9-3eb7-996970b6ffbd" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["BioSequences", "Random", "Test", "FormatSpecimens"] +test = ["BioSequences", "CodecZlib", "Random", "Test", "FormatSpecimens"] diff --git a/docs/Project.toml b/docs/Project.toml index a760fb8..30dbcbf 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,4 +1,5 @@ [deps] +BioGenerics = "47718e42-2ac5-11e9-14af-e5595289c2ea" BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" diff --git a/docs/src/files.md b/docs/src/files.md index d80651c..5b4f3cd 100644 --- a/docs/src/files.md +++ b/docs/src/files.md @@ -75,6 +75,8 @@ UInt8[] ``` To use it correctly, either call `flush`, or close the writer first (which also closes the underlying stream). + +### Readers and writers with do-syntax It is recommended to use readers and writers to `do` syntax in the form: ```jldoctest julia> FASTAWriter(open(tempname(), "w")) do writer @@ -95,6 +97,34 @@ julia> open(FASTAWriter, tempname()) do writer However, this latter syntax does not easily extend to different types of IO, such as gzip compressed streams. +However, this latter syntax does not easily extend to different types of IO, such as gzip compressed streams. + +### `rdr` and `wtr` macros +The `rdr` and `wtr` macros use the passed file name to determine the FASTX reader or writer to use - including any compression file extensions. +Since this both uses heuristics, and the macro is a little opaque to users, it is recommended to use these macros for ephemeral REPL work, and not in packages where the more explicit forms are preferred. + +The macro call `rdr"seqs.fna.gz"` expands to +```julia +FASTAReader(GzipDecompressorStream(open("seqs.fna.gz"; lock=false))) +``` + +Even though the reader (or writer) is already opened, you can still use the ordinary `open(x) do f` +pattern to automatically close the reader when done: + +```jldoctest +julia> using CodecZlib # for gzip files + +julia> open(rdr"../test/data/test.fasta") do reader + println(identifier(first(reader))) + end +abc + +julia> open(wtr"seqs.fna.gz") do writer + write(writer, FASTARecord("my_header", "TAGAG")) + end +17 +``` + ### Validate files The functions `validate_fasta` and `validate_fastq` can be used to check if an `IO` contains data that can be read as FASTX. diff --git a/docs/src/index.md b/docs/src/index.md index 719ae56..33c0614 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -75,6 +75,22 @@ julia> FASTQWriter(GzipCompressorStream(open(tempname(), "w"))) do writer 28 ``` +For added convenience, you can also use the reader and writer macros `rdr""` and `wtr""`. +These macros use the file extensions to determine the biological sequence reader or writer type, and any file compresion. +To use these macros with the `do`-syntax, you can use `open` as normal. Hence, the above code block can also be written in the following equivalent way: + +```jldoctest +julia> using CodecZlib + +julia> open(rdr"../test/data/seqs.fna.gz") do reader + for record in reader + println(identifier(record)) + end + end +seqa +seqb +``` + ### Construct FASTA or FASTQ records from raw parts ```jldoctest julia> fasta_record = FASTARecord("some header", dna"TAGAAGA"); diff --git a/src/FASTX.jl b/src/FASTX.jl index f539830..d084083 100644 --- a/src/FASTX.jl +++ b/src/FASTX.jl @@ -2,6 +2,7 @@ module FASTX using StringViews: StringView using Automa: Automa +using BioGenerics: BioGenerics, @rdr_str, @wtr_str """ identifier(record::Record)::AbstractString @@ -220,6 +221,11 @@ const FASTQReader = FASTQ.Reader const FASTAWriter = FASTA.Writer const FASTQWriter = FASTQ.Writer +const FASTA_EXTENSIONS = Union{Val{:fa}, Val{:fasta}, Val{:faa}, Val{:fna}} + +BioGenerics.readertype(::FASTA_EXTENSIONS, arg) = FASTAReader +BioGenerics.writertype(::FASTA_EXTENSIONS, arg) = FASTAWriter + if !isdefined(Base, :get_extension) include("../ext/BioSequencesExt.jl") end @@ -249,6 +255,10 @@ export faidx, index!, extract, - seekrecord + seekrecord, + + # Re-export from BioGenerics + @rdr_str, + @wtr_str end # module diff --git a/test/maintests.jl b/test/maintests.jl index d1e7429..2eba5d5 100644 --- a/test/maintests.jl +++ b/test/maintests.jl @@ -1,3 +1,5 @@ +using CodecZlib + # Common tests @testset "FASTX" begin @testset "Copying to LongSequence" begin @@ -133,4 +135,4 @@ @test sequence(fq) == "TAGJKKm" @test quality(fq) == "jjkkmmo" end -end \ No newline at end of file +end