From f6fe1daf91cd4b6ea7c0656fa18bb8d81e9e386a Mon Sep 17 00:00:00 2001 From: LCrossman Date: Wed, 18 Mar 2026 12:59:22 +0000 Subject: [PATCH 01/16] adding line for command to run python tests --- microbiorust-py/README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/microbiorust-py/README.md b/microbiorust-py/README.md index 299c993..a4f7e80 100644 --- a/microbiorust-py/README.md +++ b/microbiorust-py/README.md @@ -12,6 +12,11 @@ pip install microbiorust ``` +to use the Python tests with pytest +```bash +python3 -m pytest -s tests/test_mbr.py +``` + Wheels are available for Linux, macOS and Windows (Python 3.10+). No Rust toolchain required. (no requirement to install Rust) @@ -32,6 +37,7 @@ To verify the Python module functions are correctly exposed from Rust: cargo test ``` + --- ## Features From 5ea68fb8f6fb9d8898e7d8d82f99840abb28888c Mon Sep 17 00:00:00 2001 From: LCrossman Date: Fri, 8 May 2026 16:27:18 +0100 Subject: [PATCH 02/16] Deleted test gbk which was invalid --- microbiorust-py/test_input.gbk | 51 ---------------------------------- 1 file changed, 51 deletions(-) delete mode 100644 microbiorust-py/test_input.gbk diff --git a/microbiorust-py/test_input.gbk b/microbiorust-py/test_input.gbk deleted file mode 100644 index d1e8825..0000000 --- a/microbiorust-py/test_input.gbk +++ /dev/null @@ -1,51 +0,0 @@ -LOCUS source_1 910 bp DNA linear CON 01-NOV-2024 -DEFINITION Escherichia coli K-12 substr. MG1655. -ACCESSION source_1 -KEYWORDS . -SOURCE Escherichia coli K-12 substr. MG1655 - ORGANISM Escherichia coli K-12 substr. MG1655 -FEATURES Location/Qualifiers - source 1..910 - /organism="K-12 substr. MG1655" - /mol_type="DNA" - /strain="K-12 substr. MG1655" - /type_material="type strain of Escherichia coli K12" - /db_xref="PRJNA57779" - gene complement(1..354) - /locus_tag="b3304" - CDS complement(1..354) - /locus_tag="b3304" - /codon_start=1 - /gene="rplR" - /translation="MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGS - LVAASTVEKAIAEQLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQAL - DAAREAGLQ" - /product="50S ribosomal subunit protein L18" - gene complement(364..897) - /locus_tag="b3305" - CDS complement(364..897) - /locus_tag="b3305" - /codon_start=1 - /gene="rplF" - /translation="MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKH - NTLTFGPRDGYADGWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLS - GFSHPVDHQLPAGITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYAD - VVRTKEAKK" - /product="50S ribosomal subunit protein L6" -ORIGIN - 1 TTAGAACTGA AGGCCAGCTT CACGGGCAGC ATCTGCCAGT GCCTGGACAC GACCATGATA - 61 TTGGAACCCG GAACGGTCAA AGGATACATC TTTGATGCCT TTTTCCAGAG CGCGTTCAGC - 121 GACAGCTTTA CCCACAGCTG CAGCCGCGTC TTTGTTACCG GTGTACTTCA GTTGTTCAGC - 181 GATAGCTTTT TCTACAGTAG AAGCAGCTAC CAGAACTTCA GAACCGTTCG GTGCAATTAC - 241 CTGTGCGTAA ATGTGACGCG GGGTACGATG TACCACCAGG CGAGTTGCGC CCAGCTCCTG - 301 GAGCTTGCGG CGTGCGCGGG TCGCACGACG GATACGAGCA GATTTCTTAT CCATAGTGTT - 361 ACCTTACTTC TTCTTAGCCT CTTTGGTACG CACGACTTCG TCGGCGTAAC GAACACCCTT - 421 GCCTTTATAA GGCTCAGGAC GACGGTAGGC GCGCAGATCC GCTGCAACCT GGCCGATCAC - 481 CTGCTTATCA GCGCCTTTCA GCACGATTTC AGTCTGAGTC GGACATTCAG CAGTGATACC - 541 CGCAGGCAGC TGATGGTCAA CAGGATGAGA GAAACCCAGA GACAGGTTAA TCACATTGCC - 601 TTTAACCGCT GCACGGTAAC CTACACCAAC CAGCTGCAGC TTCTTAGTGA AGCCTTCGGT - 661 AACACCGATA ACCATTGAGT TCAGCAGGGC ACGCGCGGTA CCAGCCTGTG CCCAACCGTC - 721 TGCGTAACCA TCACGCGGAC CGAAGGTCAG GGTATTATCT GCATGTTTAA CTTCAACAGC - 781 ATCGTTGAGA GTACGAGTCA GCTCGCCGTT TTTACCTTTG ATCGTAATAA CCTGACCGTT - 841 GATTTTTACG TCAACGCCGG CAGGAACAAC GACCGGTGCT TTAGCAACAC GAGACA -// From af964a7dd00258aab568ce2537326615d56be881 Mon Sep 17 00:00:00 2001 From: LCrossman Date: Mon, 1 Jun 2026 12:34:20 +0100 Subject: [PATCH 03/16] adding to .gitignore --- .gitignore | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index ec4f674..194ad08 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,17 @@ # Generated by Cargo # will have compiled files and executables -debug/ target/ +dist/ +*.egg-info/ +__pycache__/ +*.pyc +debug/ + +*.so +*.abi3.so +*.dylib +*.pyd + expand.rs # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html @@ -24,3 +34,4 @@ env/ venv/ site/ static/ +*.gff From bdff597a2509dad5ca75923077e3a7e8218a6997 Mon Sep 17 00:00:00 2001 From: LCrossman Date: Mon, 1 Jun 2026 12:48:46 +0100 Subject: [PATCH 04/16] change of path --- microBioRust/Cargo.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/microBioRust/Cargo.toml b/microBioRust/Cargo.toml index 6238591..e4fa736 100644 --- a/microBioRust/Cargo.toml +++ b/microBioRust/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "microBioRust" -version = "0.1.3" -edition = "2021" +version = "0.1.4" +edition = "2024" license = "MIT" keywords = ["bioinformatics", "micro", "bio", "genomics", "sequence-analysis"] description = "Microbiology friendly bioinformatics Rust functions" @@ -27,7 +27,7 @@ path = "src/lib.rs" [[example]] name = "blast-example" -path = "examples/src/blast_parse.rs" +path = "examples/blast_parse.rs" [dependencies] clap = { version = "4.5.19", features = ["derive"] } From 4b0bde6e3c929016e261682adf1c7506d1999900 Mon Sep 17 00:00:00 2001 From: LCrossman Date: Mon, 1 Jun 2026 12:55:03 +0100 Subject: [PATCH 05/16] editing .gitignore --- microBioRust/Cargo.toml | 2 +- .../examples/{src => }/blast_parse.rs | 0 microBioRust/examples/convert_to_faa.rs | 34 ++++++++++++ microBioRust/src/embl.rs | 8 +-- microBioRust/src/gbk.rs | 8 +-- microBioRust/{ => tests}/K12_ribo.gbk | 0 microBioRust/{ => tests}/Rhiz3841.embl | 0 microBioRust/{ => tests}/Rhiz3841.gbk.gb | 0 microBioRust/tests/cleaned_dna.fasta | 6 +++ microBioRust/{ => tests}/example.embl | 0 microBioRust/tests/new_output_embl.gbk | 52 +++++++++++++++++++ 11 files changed, 101 insertions(+), 9 deletions(-) rename microBioRust/examples/{src => }/blast_parse.rs (100%) create mode 100644 microBioRust/examples/convert_to_faa.rs rename microBioRust/{ => tests}/K12_ribo.gbk (100%) rename microBioRust/{ => tests}/Rhiz3841.embl (100%) rename microBioRust/{ => tests}/Rhiz3841.gbk.gb (100%) create mode 100644 microBioRust/tests/cleaned_dna.fasta rename microBioRust/{ => tests}/example.embl (100%) create mode 100644 microBioRust/tests/new_output_embl.gbk diff --git a/microBioRust/Cargo.toml b/microBioRust/Cargo.toml index e4fa736..aef0abf 100644 --- a/microBioRust/Cargo.toml +++ b/microBioRust/Cargo.toml @@ -14,7 +14,7 @@ categories = [ ] readme = "README.md" authors = ["Lisa Crossman and microBioRust community"] -exclude = [".git", ".gitignore", ".dribble.example.embl", "tests/data/"] +exclude = [".git", ".gitignore", ".dribble.example.embl", "tests/data/", "src/bin","config.toml","K12_ribo.gbk","Rhiz3841.gbk.gb","Rhiz3841.gbk.gb_out.faa","rust_via_python_countgbk2faa.py","rust_via_python_gbk2faa.py","asv.conf.json","benchmarks/"] repository = "https://github.com/microBioRust/microBioRust" documentation = "https://microbiorust.github.io/docs/" diff --git a/microBioRust/examples/src/blast_parse.rs b/microBioRust/examples/blast_parse.rs similarity index 100% rename from microBioRust/examples/src/blast_parse.rs rename to microBioRust/examples/blast_parse.rs diff --git a/microBioRust/examples/convert_to_faa.rs b/microBioRust/examples/convert_to_faa.rs new file mode 100644 index 0000000..3d972a2 --- /dev/null +++ b/microBioRust/examples/convert_to_faa.rs @@ -0,0 +1,34 @@ + use clap::Parser; + use std::{ + fs::File, + io::{Write, BufWriter}, + }; + use microBioRust::{ + genbank, + }; + + #[derive(Parser, Debug)] + #[clap(author, version, about)] + struct Arguments { + #[clap(short, long)] + filename: String, + #[clap(short, long)] + output: String, + } + +fn main() -> Result<(), anyhow::Error> { + let args = Arguments::parse(); + let records = genbank!(&args.filename); + let file = File::create(&args.output)?; + let mut writer = BufWriter::new(file); + for record in records { + for (k, _v) in &record.cds.attributes { + if let Some(seq) = record.seq_features.get_sequence_faa(k) { + writeln!(writer, ">{}|{}\n{}", &record.id, &k, seq)?; + } + } + } + writer.flush()?; + Ok(()) +} + diff --git a/microBioRust/src/embl.rs b/microBioRust/src/embl.rs index ed571f1..58b09ec 100644 --- a/microBioRust/src/embl.rs +++ b/microBioRust/src/embl.rs @@ -263,14 +263,14 @@ //!``` //! -use anyhow::{anyhow, Context}; +use anyhow::{Context, anyhow}; use bio::alphabets::dna::revcomp; use chrono::prelude::*; use lazy_static::lazy_static; use paste::paste; use protein_translate::translate; -use serde::Serialize; use regex::Regex; +use serde::Serialize; use std::{ collections::{BTreeMap, HashSet}, convert::{AsRef, TryInto}, @@ -600,8 +600,8 @@ where //println!("designated codon start {:?} {:?}", &codon_start, &locus_tag); } if self.line_buffer.contains("/gene=") { - let gen: Vec<&str> = self.line_buffer.split('\"').collect(); - gene = gen[1].to_string(); + let genes: Vec<&str> = self.line_buffer.split('\"').collect(); + gene = genes[1].to_string(); //println!("gene designated {:?} {:?}", &gene, &locus_tag); } if self.line_buffer.contains("/product") { diff --git a/microBioRust/src/gbk.rs b/microBioRust/src/gbk.rs index e20b22c..abc0091 100644 --- a/microBioRust/src/gbk.rs +++ b/microBioRust/src/gbk.rs @@ -126,7 +126,7 @@ //! let mut read_counter: u32 = 0; //! let mut seq_region: BTreeMap = BTreeMap::new(); //! let mut record_vec: Vec = Vec::new(); -//! loop { +//! loop { //! match records.next() { //! Some(Ok(mut record)) => { //! println!("next record"); @@ -275,7 +275,7 @@ //!``` //! -use anyhow::{anyhow, Context}; +use anyhow::{Context, anyhow}; use bio::alphabets::dna::revcomp; use chrono::prelude::*; use itertools::Itertools; @@ -681,8 +681,8 @@ where //println!("designated codon start {:?} {:?}", &codon_start, &locus_tag); } if self.line_buffer.contains("/gene=") { - let gen: Vec<&str> = self.line_buffer.split('\"').collect(); - gene = gen[1].to_string(); + let genes: Vec<&str> = self.line_buffer.split('\"').collect(); + gene = genes[1].to_string(); //println!("gene designated {:?} {:?}", &gene, &locus_tag); } if self.line_buffer.contains("/product") { diff --git a/microBioRust/K12_ribo.gbk b/microBioRust/tests/K12_ribo.gbk similarity index 100% rename from microBioRust/K12_ribo.gbk rename to microBioRust/tests/K12_ribo.gbk diff --git a/microBioRust/Rhiz3841.embl b/microBioRust/tests/Rhiz3841.embl similarity index 100% rename from microBioRust/Rhiz3841.embl rename to microBioRust/tests/Rhiz3841.embl diff --git a/microBioRust/Rhiz3841.gbk.gb b/microBioRust/tests/Rhiz3841.gbk.gb similarity index 100% rename from microBioRust/Rhiz3841.gbk.gb rename to microBioRust/tests/Rhiz3841.gbk.gb diff --git a/microBioRust/tests/cleaned_dna.fasta b/microBioRust/tests/cleaned_dna.fasta new file mode 100644 index 0000000..53dfabd --- /dev/null +++ b/microBioRust/tests/cleaned_dna.fasta @@ -0,0 +1,6 @@ +>seq1 +ATGC-ATGCATGCATGC +>seq2 +ATGCAATGCTTGCATGC +>seq3 +TTGCAATCCATGCAAGC diff --git a/microBioRust/example.embl b/microBioRust/tests/example.embl similarity index 100% rename from microBioRust/example.embl rename to microBioRust/tests/example.embl diff --git a/microBioRust/tests/new_output_embl.gbk b/microBioRust/tests/new_output_embl.gbk new file mode 100644 index 0000000..0a27db7 --- /dev/null +++ b/microBioRust/tests/new_output_embl.gbk @@ -0,0 +1,52 @@ +LOCUS source_1 928 bp DNA linear CON 01-JUN-2026 +DEFINITION Escherichia coli K-12 substr. MG1655. +ACCESSION source_1 +KEYWORDS . +SOURCE Escherichia coli K-12 substr. MG1655 + ORGANISM Escherichia coli K-12 substr. MG1655 +FEATURES Location/Qualifiers + source 1..910 + /organism="K-12 substr. MG1655" + /mol_type="DNA" + /strain="K-12 substr. MG1655" + /db_xref="PRJNA57779" + gene complement(1..354) + /locus_tag="b3304" + CDS complement(1..354) + /locus_tag="b3304" + /codon_start="1" + /gene="rplR" + /translation="MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGS + LVAASTVEKAIAEQLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQAL + DAAREAGLQ" + /product="50S ribosomal subunit protein L18" + gene complement(364..897) + /locus_tag="b3305" + CDS complement(364..897) + /locus_tag="b3305" + /codon_start="1" + /gene="rplF" + /translation="MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKH + NTLTFGPRDGYADGWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLS + GFSHPVDHQLPAGITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYAD + VVRTKEAKK" + /product="50S ribosomal subunit protein L6" +ORIGIN + 1 acctctacct tagaactgaa ggccagcttc acgggcagca tctgccagtg cctggacacg + 61 accatgatat tggaacccgg aacggtcaaa ggatacatct ttgatgcctt tttccagagc + 121 gcgttcagcg acagctttac ccacagctgc agccgcgtct ttgttaccgg tgtacttcag + 181 ttgttcagcg atagcttttt ctacagtaga agcagctacc agaacttcag aaccgttcgg + 241 tgcaattacc tgtgcgtaaa tgtgacgcgg ggtacgatgt accaccaggc gagttgcgcc + 301 cagctcctgg agcttgcggc gtgcgcgggt cgcacgacgg atacgagcag atttcttatc + 361 catagtgtta ccttacttct tcttagcctc tttggtacgc acgacttcgt cggcgtaacg + 421 aacacccttg cctttataag gctcaggacg acggtaggcg cgcagatccg ctgcaacctg + 481 gccgatcacc tgcttatcag cgcctttcag cacgatttca gtctgagtcg gacattcagc + 541 agtgataccc gcaggcagct gatggtcaac aggatgagag aaacccagag acaggttaat + 601 cacattgcct ttaaccgctg cacggtaacc tacaccaacc agctgcagct tcttagtgaa + 661 gccttcggta acaccgataa ccattgagtt cagcagggca cgcgcggtac cagcctgtgc + 721 ccaaccgtct gcgtaaccat cacgcggacc gaaggtcagg gtattatctg catgtttaac + 781 ttcaacagca tcgttgagag tacgagtcag ctcgccgttt ttacctttga tcgtaataac + 841 ctgaccgttg atttttacgt caacgccggc aggaacaacg accggtgctt tagcaacacg + 901 agacattttt tcc + +// From 83694472adde21c72c205420f95d3d087b8d7abc Mon Sep 17 00:00:00 2001 From: LCrossman Date: Mon, 1 Jun 2026 13:06:15 +0100 Subject: [PATCH 06/16] fix file test paths after directory move --- microBioRust/cleaned_dna.fasta | 6 + microBioRust/new_output_embl.gbk | 260 +++++++++++++++++++++++++++ microBioRust/src/embl.rs | 10 +- microBioRust/src/gbk.rs | 10 +- microBioRust/tests/embl_to_faa.rs | 2 +- microBioRust/tests/embl_to_ffn.rs | 2 +- microBioRust/tests/embl_to_gff.rs | 4 +- microBioRust/tests/genbank_to_faa.rs | 2 +- microBioRust/tests/genbank_to_ffn.rs | 2 +- microBioRust/tests/genbank_to_gff.rs | 4 +- 10 files changed, 284 insertions(+), 18 deletions(-) create mode 100644 microBioRust/cleaned_dna.fasta create mode 100644 microBioRust/new_output_embl.gbk diff --git a/microBioRust/cleaned_dna.fasta b/microBioRust/cleaned_dna.fasta new file mode 100644 index 0000000..53dfabd --- /dev/null +++ b/microBioRust/cleaned_dna.fasta @@ -0,0 +1,6 @@ +>seq1 +ATGC-ATGCATGCATGC +>seq2 +ATGCAATGCTTGCATGC +>seq3 +TTGCAATCCATGCAAGC diff --git a/microBioRust/new_output_embl.gbk b/microBioRust/new_output_embl.gbk new file mode 100644 index 0000000..3182d68 --- /dev/null +++ b/microBioRust/new_output_embl.gbk @@ -0,0 +1,260 @@ +LOCUS source_1 928 bp DNA linear CON 01-JUN-2026 +DEFINITION Escherichia coli K-12 substr. MG1655. +ACCESSION source_1 +KEYWORDS . +SOURCE Escherichia coli K-12 substr. MG1655 + ORGANISM Escherichia coli K-12 substr. MG1655 +FEATURES Location/Qualifiers + source 1..910 + /organism="K-12 substr. MG1655" + /mol_type="DNA" + /strain="K-12 substr. MG1655" + /db_xref="PRJNA57779" + gene complement(1..354) + /locus_tag="b3304" + CDS complement(1..354) + /locus_tag="b3304" + /codon_start="1" + /gene="rplR" + /translation="MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGS + LVAASTVEKAIAEQLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQAL + DAAREAGLQ" + /product="50S ribosomal subunit protein L18" + gene complement(364..897) + /locus_tag="b3305" + CDS complement(364..897) + /locus_tag="b3305" + /codon_start="1" + /gene="rplF" + /translation="MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKH + NTLTFGPRDGYADGWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLS + GFSHPVDHQLPAGITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYAD + VVRTKEAKK" + /product="50S ribosomal subunit protein L6" +ORIGIN + 1 acctctacct tagaactgaa ggccagcttc acgggcagca tctgccagtg cctggacacg + 61 accatgatat tggaacccgg aacggtcaaa ggatacatct ttgatgcctt tttccagagc + 121 gcgttcagcg acagctttac ccacagctgc agccgcgtct ttgttaccgg tgtacttcag + 181 ttgttcagcg atagcttttt ctacagtaga agcagctacc agaacttcag aaccgttcgg + 241 tgcaattacc tgtgcgtaaa tgtgacgcgg ggtacgatgt accaccaggc gagttgcgcc + 301 cagctcctgg agcttgcggc gtgcgcgggt cgcacgacgg atacgagcag atttcttatc + 361 catagtgtta ccttacttct tcttagcctc tttggtacgc acgacttcgt cggcgtaacg + 421 aacacccttg cctttataag gctcaggacg acggtaggcg cgcagatccg ctgcaacctg + 481 gccgatcacc tgcttatcag cgcctttcag cacgatttca gtctgagtcg gacattcagc + 541 agtgataccc gcaggcagct gatggtcaac aggatgagag aaacccagag acaggttaat + 601 cacattgcct ttaaccgctg cacggtaacc tacaccaacc agctgcagct tcttagtgaa + 661 gccttcggta acaccgataa ccattgagtt cagcagggca cgcgcggtac cagcctgtgc + 721 ccaaccgtct gcgtaaccat cacgcggacc gaaggtcagg gtattatctg catgtttaac + 781 ttcaacagca tcgttgagag tacgagtcag ctcgccgttt ttacctttga tcgtaataac + 841 ctgaccgttg atttttacgt caacgccggc aggaacaacg accggtgctt tagcaacacg + 901 agacattttt tcc + +// +LOCUS source_1 928 bp DNA linear CON 01-JUN-2026 +DEFINITION Escherichia coli K-12 substr. MG1655. +ACCESSION source_1 +KEYWORDS . +SOURCE Escherichia coli K-12 substr. MG1655 + ORGANISM Escherichia coli K-12 substr. MG1655 +FEATURES Location/Qualifiers + source 1..910 + /organism="K-12 substr. MG1655" + /mol_type="DNA" + /strain="K-12 substr. MG1655" + /db_xref="PRJNA57779" + gene complement(1..354) + /locus_tag="b3304" + CDS complement(1..354) + /locus_tag="b3304" + /codon_start="1" + /gene="rplR" + /translation="MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGS + LVAASTVEKAIAEQLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQAL + DAAREAGLQ" + /product="50S ribosomal subunit protein L18" + gene complement(364..897) + /locus_tag="b3305" + CDS complement(364..897) + /locus_tag="b3305" + /codon_start="1" + /gene="rplF" + /translation="MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKH + NTLTFGPRDGYADGWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLS + GFSHPVDHQLPAGITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYAD + VVRTKEAKK" + /product="50S ribosomal subunit protein L6" +ORIGIN + 1 acctctacct tagaactgaa ggccagcttc acgggcagca tctgccagtg cctggacacg + 61 accatgatat tggaacccgg aacggtcaaa ggatacatct ttgatgcctt tttccagagc + 121 gcgttcagcg acagctttac ccacagctgc agccgcgtct ttgttaccgg tgtacttcag + 181 ttgttcagcg atagcttttt ctacagtaga agcagctacc agaacttcag aaccgttcgg + 241 tgcaattacc tgtgcgtaaa tgtgacgcgg ggtacgatgt accaccaggc gagttgcgcc + 301 cagctcctgg agcttgcggc gtgcgcgggt cgcacgacgg atacgagcag atttcttatc + 361 catagtgtta ccttacttct tcttagcctc tttggtacgc acgacttcgt cggcgtaacg + 421 aacacccttg cctttataag gctcaggacg acggtaggcg cgcagatccg ctgcaacctg + 481 gccgatcacc tgcttatcag cgcctttcag cacgatttca gtctgagtcg gacattcagc + 541 agtgataccc gcaggcagct gatggtcaac aggatgagag aaacccagag acaggttaat + 601 cacattgcct ttaaccgctg cacggtaacc tacaccaacc agctgcagct tcttagtgaa + 661 gccttcggta acaccgataa ccattgagtt cagcagggca cgcgcggtac cagcctgtgc + 721 ccaaccgtct gcgtaaccat cacgcggacc gaaggtcagg gtattatctg catgtttaac + 781 ttcaacagca tcgttgagag tacgagtcag ctcgccgttt ttacctttga tcgtaataac + 841 ctgaccgttg atttttacgt caacgccggc aggaacaacg accggtgctt tagcaacacg + 901 agacattttt tcc + +// +LOCUS source_1 928 bp DNA linear CON 01-JUN-2026 +DEFINITION Escherichia coli K-12 substr. MG1655. +ACCESSION source_1 +KEYWORDS . +SOURCE Escherichia coli K-12 substr. MG1655 + ORGANISM Escherichia coli K-12 substr. MG1655 +FEATURES Location/Qualifiers + source 1..910 + /organism="K-12 substr. MG1655" + /mol_type="DNA" + /strain="K-12 substr. MG1655" + /db_xref="PRJNA57779" + gene complement(1..354) + /locus_tag="b3304" + CDS complement(1..354) + /locus_tag="b3304" + /codon_start="1" + /gene="rplR" + /translation="MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGS + LVAASTVEKAIAEQLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQAL + DAAREAGLQ" + /product="50S ribosomal subunit protein L18" + gene complement(364..897) + /locus_tag="b3305" + CDS complement(364..897) + /locus_tag="b3305" + /codon_start="1" + /gene="rplF" + /translation="MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKH + NTLTFGPRDGYADGWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLS + GFSHPVDHQLPAGITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYAD + VVRTKEAKK" + /product="50S ribosomal subunit protein L6" +ORIGIN + 1 acctctacct tagaactgaa ggccagcttc acgggcagca tctgccagtg cctggacacg + 61 accatgatat tggaacccgg aacggtcaaa ggatacatct ttgatgcctt tttccagagc + 121 gcgttcagcg acagctttac ccacagctgc agccgcgtct ttgttaccgg tgtacttcag + 181 ttgttcagcg atagcttttt ctacagtaga agcagctacc agaacttcag aaccgttcgg + 241 tgcaattacc tgtgcgtaaa tgtgacgcgg ggtacgatgt accaccaggc gagttgcgcc + 301 cagctcctgg agcttgcggc gtgcgcgggt cgcacgacgg atacgagcag atttcttatc + 361 catagtgtta ccttacttct tcttagcctc tttggtacgc acgacttcgt cggcgtaacg + 421 aacacccttg cctttataag gctcaggacg acggtaggcg cgcagatccg ctgcaacctg + 481 gccgatcacc tgcttatcag cgcctttcag cacgatttca gtctgagtcg gacattcagc + 541 agtgataccc gcaggcagct gatggtcaac aggatgagag aaacccagag acaggttaat + 601 cacattgcct ttaaccgctg cacggtaacc tacaccaacc agctgcagct tcttagtgaa + 661 gccttcggta acaccgataa ccattgagtt cagcagggca cgcgcggtac cagcctgtgc + 721 ccaaccgtct gcgtaaccat cacgcggacc gaaggtcagg gtattatctg catgtttaac + 781 ttcaacagca tcgttgagag tacgagtcag ctcgccgttt ttacctttga tcgtaataac + 841 ctgaccgttg atttttacgt caacgccggc aggaacaacg accggtgctt tagcaacacg + 901 agacattttt tcc + +// +LOCUS source_1 928 bp DNA linear CON 01-JUN-2026 +DEFINITION Escherichia coli K-12 substr. MG1655. +ACCESSION source_1 +KEYWORDS . +SOURCE Escherichia coli K-12 substr. MG1655 + ORGANISM Escherichia coli K-12 substr. MG1655 +FEATURES Location/Qualifiers + source 1..910 + /organism="K-12 substr. MG1655" + /mol_type="DNA" + /strain="K-12 substr. MG1655" + /db_xref="PRJNA57779" + gene complement(1..354) + /locus_tag="b3304" + CDS complement(1..354) + /locus_tag="b3304" + /codon_start="1" + /gene="rplR" + /translation="MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGS + LVAASTVEKAIAEQLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQAL + DAAREAGLQ" + /product="50S ribosomal subunit protein L18" + gene complement(364..897) + /locus_tag="b3305" + CDS complement(364..897) + /locus_tag="b3305" + /codon_start="1" + /gene="rplF" + /translation="MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKH + NTLTFGPRDGYADGWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLS + GFSHPVDHQLPAGITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYAD + VVRTKEAKK" + /product="50S ribosomal subunit protein L6" +ORIGIN + 1 acctctacct tagaactgaa ggccagcttc acgggcagca tctgccagtg cctggacacg + 61 accatgatat tggaacccgg aacggtcaaa ggatacatct ttgatgcctt tttccagagc + 121 gcgttcagcg acagctttac ccacagctgc agccgcgtct ttgttaccgg tgtacttcag + 181 ttgttcagcg atagcttttt ctacagtaga agcagctacc agaacttcag aaccgttcgg + 241 tgcaattacc tgtgcgtaaa tgtgacgcgg ggtacgatgt accaccaggc gagttgcgcc + 301 cagctcctgg agcttgcggc gtgcgcgggt cgcacgacgg atacgagcag atttcttatc + 361 catagtgtta ccttacttct tcttagcctc tttggtacgc acgacttcgt cggcgtaacg + 421 aacacccttg cctttataag gctcaggacg acggtaggcg cgcagatccg ctgcaacctg + 481 gccgatcacc tgcttatcag cgcctttcag cacgatttca gtctgagtcg gacattcagc + 541 agtgataccc gcaggcagct gatggtcaac aggatgagag aaacccagag acaggttaat + 601 cacattgcct ttaaccgctg cacggtaacc tacaccaacc agctgcagct tcttagtgaa + 661 gccttcggta acaccgataa ccattgagtt cagcagggca cgcgcggtac cagcctgtgc + 721 ccaaccgtct gcgtaaccat cacgcggacc gaaggtcagg gtattatctg catgtttaac + 781 ttcaacagca tcgttgagag tacgagtcag ctcgccgttt ttacctttga tcgtaataac + 841 ctgaccgttg atttttacgt caacgccggc aggaacaacg accggtgctt tagcaacacg + 901 agacattttt tcc + +// +LOCUS source_1 928 bp DNA linear CON 01-JUN-2026 +DEFINITION Escherichia coli K-12 substr. MG1655. +ACCESSION source_1 +KEYWORDS . +SOURCE Escherichia coli K-12 substr. MG1655 + ORGANISM Escherichia coli K-12 substr. MG1655 +FEATURES Location/Qualifiers + source 1..910 + /organism="K-12 substr. MG1655" + /mol_type="DNA" + /strain="K-12 substr. MG1655" + /db_xref="PRJNA57779" + gene complement(1..354) + /locus_tag="b3304" + CDS complement(1..354) + /locus_tag="b3304" + /codon_start="1" + /gene="rplR" + /translation="MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGS + LVAASTVEKAIAEQLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQAL + DAAREAGLQ" + /product="50S ribosomal subunit protein L18" + gene complement(364..897) + /locus_tag="b3305" + CDS complement(364..897) + /locus_tag="b3305" + /codon_start="1" + /gene="rplF" + /translation="MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKH + NTLTFGPRDGYADGWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLS + GFSHPVDHQLPAGITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYAD + VVRTKEAKK" + /product="50S ribosomal subunit protein L6" +ORIGIN + 1 acctctacct tagaactgaa ggccagcttc acgggcagca tctgccagtg cctggacacg + 61 accatgatat tggaacccgg aacggtcaaa ggatacatct ttgatgcctt tttccagagc + 121 gcgttcagcg acagctttac ccacagctgc agccgcgtct ttgttaccgg tgtacttcag + 181 ttgttcagcg atagcttttt ctacagtaga agcagctacc agaacttcag aaccgttcgg + 241 tgcaattacc tgtgcgtaaa tgtgacgcgg ggtacgatgt accaccaggc gagttgcgcc + 301 cagctcctgg agcttgcggc gtgcgcgggt cgcacgacgg atacgagcag atttcttatc + 361 catagtgtta ccttacttct tcttagcctc tttggtacgc acgacttcgt cggcgtaacg + 421 aacacccttg cctttataag gctcaggacg acggtaggcg cgcagatccg ctgcaacctg + 481 gccgatcacc tgcttatcag cgcctttcag cacgatttca gtctgagtcg gacattcagc + 541 agtgataccc gcaggcagct gatggtcaac aggatgagag aaacccagag acaggttaat + 601 cacattgcct ttaaccgctg cacggtaacc tacaccaacc agctgcagct tcttagtgaa + 661 gccttcggta acaccgataa ccattgagtt cagcagggca cgcgcggtac cagcctgtgc + 721 ccaaccgtct gcgtaaccat cacgcggacc gaaggtcagg gtattatctg catgtttaac + 781 ttcaacagca tcgttgagag tacgagtcag ctcgccgttt ttacctttga tcgtaataac + 841 ctgaccgttg atttttacgt caacgccggc aggaacaacg accggtgctt tagcaacacg + 901 agacattttt tcc + +// diff --git a/microBioRust/src/embl.rs b/microBioRust/src/embl.rs index 58b09ec..9b3d666 100644 --- a/microBioRust/src/embl.rs +++ b/microBioRust/src/embl.rs @@ -1542,7 +1542,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_read_file() { - let content = std::fs::read_to_string("example.embl").expect("error reading file"); + let content = std::fs::read_to_string("tests/example.embl").expect("error reading file"); assert!(content.contains("ID")); assert!(content.len() > 0); } @@ -1553,7 +1553,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_parse_embl() { - let file_embl = "example.embl"; + let file_embl = "tests/example.embl"; let records = embl!(&file_embl); assert!(records.len() > 0); } @@ -1564,7 +1564,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_parse_source_attributes() { - let file_embl = "example.embl"; + let file_embl = "tests/example.embl"; let records = embl!(&file_embl); if let Some(record) = records.first() { if let Some((key, val)) = record.source_map.source_attributes.first_key_value() { @@ -1579,7 +1579,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_parse_cds_attributes() { - let file_embl = "example.embl"; + let file_embl = "tests/example.embl"; let records = embl!(&file_embl); if let Some(record) = records.first() { if let Some((locus_tag, vals)) = record.cds.attributes.first_key_value() { @@ -1598,7 +1598,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_parse_sequence_attributes() { - let file_embl = "example.embl"; + let file_embl = "tests/example.embl"; let records = embl!(&file_embl); if let Some(record) = records.first() { if let Some((key, vals)) = record.cds.attributes.first_key_value() { diff --git a/microBioRust/src/gbk.rs b/microBioRust/src/gbk.rs index abc0091..186f427 100644 --- a/microBioRust/src/gbk.rs +++ b/microBioRust/src/gbk.rs @@ -1732,7 +1732,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_read_file() { - let content = std::fs::read_to_string("K12_ribo.gbk").expect("error reading file"); + let content = std::fs::read_to_string("tests/K12_ribo.gbk").expect("error reading file"); assert!(content.contains("LOCUS")); assert!(content.len() > 0); } @@ -1743,7 +1743,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_parse_gbk() { - let file_gbk = "K12_ribo.gbk"; + let file_gbk = "tests/K12_ribo.gbk"; let records = genbank!(&file_gbk); assert!(records.len() > 0); } @@ -1754,7 +1754,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_parse_source_attributes() { - let file_gbk = "K12_ribo.gbk"; + let file_gbk = "tests/K12_ribo.gbk"; let records = genbank!(&file_gbk); if let Some(record) = records.first() { if let Some((key, val)) = record.source_map.source_attributes.first_key_value() { @@ -1769,7 +1769,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_parse_cds_attributes() { - let file_gbk = "K12_ribo.gbk"; + let file_gbk = "tests/K12_ribo.gbk"; let records = genbank!(&file_gbk); if let Some(record) = records.first() { if let Some((locus_tag, vals)) = record.cds.attributes.first_key_value() { @@ -1788,7 +1788,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_parse_sequence_attributes() { - let file_gbk = "K12_ribo.gbk"; + let file_gbk = "tests/K12_ribo.gbk"; let records = genbank!(&file_gbk); if let Some(record) = records.first() { if let Some((key, vals)) = record.cds.attributes.first_key_value() { diff --git a/microBioRust/tests/embl_to_faa.rs b/microBioRust/tests/embl_to_faa.rs index 86933c7..e44fe32 100644 --- a/microBioRust/tests/embl_to_faa.rs +++ b/microBioRust/tests/embl_to_faa.rs @@ -2,7 +2,7 @@ use microBioRust::embl::Reader; use std::fs; #[test] fn embl_to_faa() -> Result<(), anyhow::Error> { - let file_embl = fs::File::open("example.embl")?; + let file_embl = fs::File::open("tests/example.embl")?; let reader = Reader::new(file_embl); let mut records = reader.records(); let mut read_counter: u32 = 0; diff --git a/microBioRust/tests/embl_to_ffn.rs b/microBioRust/tests/embl_to_ffn.rs index 70a6f7a..7486ca3 100644 --- a/microBioRust/tests/embl_to_ffn.rs +++ b/microBioRust/tests/embl_to_ffn.rs @@ -2,7 +2,7 @@ use microBioRust::embl::Reader; use std::fs; #[test] pub fn embl_to_ffn() -> Result<(), anyhow::Error> { - let file_embl = fs::File::open("example.embl")?; + let file_embl = fs::File::open("tests/example.embl")?; let reader = Reader::new(file_embl); let mut records = reader.records(); let mut read_counter: u32 = 0; diff --git a/microBioRust/tests/embl_to_gff.rs b/microBioRust/tests/embl_to_gff.rs index 099cf6a..d385f8a 100644 --- a/microBioRust/tests/embl_to_gff.rs +++ b/microBioRust/tests/embl_to_gff.rs @@ -1,10 +1,10 @@ -use microBioRust::embl::{gff_write, Reader, Record}; +use microBioRust::embl::{Reader, Record, gff_write}; use std::collections::BTreeMap; use std::fs; #[test] fn test_embl_to_gff() -> std::io::Result<()> { - let file_embl = fs::File::open("example.embl")?; + let file_embl = fs::File::open("tests/example.embl")?; let reader = Reader::new(file_embl); let mut records = reader.records(); let mut read_counter: u32 = 0; diff --git a/microBioRust/tests/genbank_to_faa.rs b/microBioRust/tests/genbank_to_faa.rs index 2dbee2e..5eb8197 100644 --- a/microBioRust/tests/genbank_to_faa.rs +++ b/microBioRust/tests/genbank_to_faa.rs @@ -2,7 +2,7 @@ use microBioRust::gbk::Reader; use std::fs; #[test] pub fn genbank_to_faa() -> Result<(), anyhow::Error> { - let file_gbk = fs::File::open("K12_ribo.gbk")?; + let file_gbk = fs::File::open("tests/K12_ribo.gbk")?; let reader = Reader::new(file_gbk); let mut records = reader.records(); let mut read_counter: u32 = 0; diff --git a/microBioRust/tests/genbank_to_ffn.rs b/microBioRust/tests/genbank_to_ffn.rs index 7ba93aa..3a285d2 100644 --- a/microBioRust/tests/genbank_to_ffn.rs +++ b/microBioRust/tests/genbank_to_ffn.rs @@ -2,7 +2,7 @@ use microBioRust::gbk::Reader; use std::fs; #[test] pub fn genbank_to_ffn() -> Result<(), anyhow::Error> { - let file_gbk = fs::File::open("K12_ribo.gbk")?; + let file_gbk = fs::File::open("tests/K12_ribo.gbk")?; let reader = Reader::new(file_gbk); let mut records = reader.records(); let mut read_counter: u32 = 0; diff --git a/microBioRust/tests/genbank_to_gff.rs b/microBioRust/tests/genbank_to_gff.rs index 9274ff9..5f860bb 100644 --- a/microBioRust/tests/genbank_to_gff.rs +++ b/microBioRust/tests/genbank_to_gff.rs @@ -1,10 +1,10 @@ -use microBioRust::gbk::{gff_write, Reader, Record}; +use microBioRust::gbk::{Reader, Record, gff_write}; use std::collections::BTreeMap; use std::fs; use std::io; #[test] pub fn genbank_to_gff() -> io::Result<()> { - let file_gbk = fs::File::open("K12_ribo.gbk")?; + let file_gbk = fs::File::open("tests/K12_ribo.gbk")?; let _prev_start: u32 = 0; let mut prev_end: u32 = 0; let reader = Reader::new(file_gbk); From 1896425d39f87fed289da23aeafe515c76fc57d9 Mon Sep 17 00:00:00 2001 From: LCrossman Date: Mon, 1 Jun 2026 13:11:41 +0100 Subject: [PATCH 07/16] adding an example --- microBioRust/Cargo.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/microBioRust/Cargo.toml b/microBioRust/Cargo.toml index aef0abf..0be129e 100644 --- a/microBioRust/Cargo.toml +++ b/microBioRust/Cargo.toml @@ -29,6 +29,10 @@ path = "src/lib.rs" name = "blast-example" path = "examples/blast_parse.rs" +[[example]] +name = "convert-to-faa" +path = "examples/convert_to_faa.rs" + [dependencies] clap = { version = "4.5.19", features = ["derive"] } paste = "1.0" From 0e0da0ff19a24901103557502aba2cb832685805 Mon Sep 17 00:00:00 2001 From: LCrossman Date: Mon, 1 Jun 2026 13:27:03 +0100 Subject: [PATCH 08/16] removing test generated file, tidying imports for example --- microBioRust/examples/blast_parse.rs | 24 +-- microBioRust/new_output_embl.gbk | 260 --------------------------- 2 files changed, 13 insertions(+), 271 deletions(-) delete mode 100644 microBioRust/new_output_embl.gbk diff --git a/microBioRust/examples/blast_parse.rs b/microBioRust/examples/blast_parse.rs index 9daded5..3b875a7 100644 --- a/microBioRust/examples/blast_parse.rs +++ b/microBioRust/examples/blast_parse.rs @@ -1,17 +1,15 @@ -use anyhow::{Context, Result}; -use async_compression::tokio::bufread::GzipDecoder as AsyncGzDecoder; +use anyhow::Result; use clap::Parser; -use quick_xml::events::Event; -use quick_xml::reader::Reader; -use quick_xml::escape::unescape; -use serde::Serialize; -use serde_json::ser::Serializer as JsonSerializer; use microBioRust::blast::*; -use std::io::Cursor; -use tokio::io::{self, AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncWriteExt, BufReader}; +use tokio::io::AsyncWriteExt; #[derive(Parser, Debug)] -#[command(name = "blast-parsers", author, version, about = "async microBioRust BLAST parsers: for outfmt6 (single line tabular) and outfmt5 (xml)")] +#[command( + name = "blast-parsers", + author, + version, + about = "async microBioRust BLAST parsers: for outfmt6 (single line tabular) and outfmt5 (xml)" +)] struct Cli { ///Use .gz for gzip-compressed files. #[arg(short, long, default_value = "-")] @@ -44,7 +42,11 @@ async fn main() -> Result<()> { buf.push(b'\n'); tokio::io::stdout().write_all(&buf).await?; } else { - println!("query {:?} hits {}", iter_rec.query_def, iter_rec.hits.len()); + println!( + "query {:?} hits {}", + iter_rec.query_def, + iter_rec.hits.len() + ); } } Err(e) => eprintln!("xml parse error: {}", e), diff --git a/microBioRust/new_output_embl.gbk b/microBioRust/new_output_embl.gbk deleted file mode 100644 index 3182d68..0000000 --- a/microBioRust/new_output_embl.gbk +++ /dev/null @@ -1,260 +0,0 @@ -LOCUS source_1 928 bp DNA linear CON 01-JUN-2026 -DEFINITION Escherichia coli K-12 substr. MG1655. -ACCESSION source_1 -KEYWORDS . -SOURCE Escherichia coli K-12 substr. MG1655 - ORGANISM Escherichia coli K-12 substr. MG1655 -FEATURES Location/Qualifiers - source 1..910 - /organism="K-12 substr. MG1655" - /mol_type="DNA" - /strain="K-12 substr. MG1655" - /db_xref="PRJNA57779" - gene complement(1..354) - /locus_tag="b3304" - CDS complement(1..354) - /locus_tag="b3304" - /codon_start="1" - /gene="rplR" - /translation="MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGS - LVAASTVEKAIAEQLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQAL - DAAREAGLQ" - /product="50S ribosomal subunit protein L18" - gene complement(364..897) - /locus_tag="b3305" - CDS complement(364..897) - /locus_tag="b3305" - /codon_start="1" - /gene="rplF" - /translation="MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKH - NTLTFGPRDGYADGWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLS - GFSHPVDHQLPAGITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYAD - VVRTKEAKK" - /product="50S ribosomal subunit protein L6" -ORIGIN - 1 acctctacct tagaactgaa ggccagcttc acgggcagca tctgccagtg cctggacacg - 61 accatgatat tggaacccgg aacggtcaaa ggatacatct ttgatgcctt tttccagagc - 121 gcgttcagcg acagctttac ccacagctgc agccgcgtct ttgttaccgg tgtacttcag - 181 ttgttcagcg atagcttttt ctacagtaga agcagctacc agaacttcag aaccgttcgg - 241 tgcaattacc tgtgcgtaaa tgtgacgcgg ggtacgatgt accaccaggc gagttgcgcc - 301 cagctcctgg agcttgcggc gtgcgcgggt cgcacgacgg atacgagcag atttcttatc - 361 catagtgtta ccttacttct tcttagcctc tttggtacgc acgacttcgt cggcgtaacg - 421 aacacccttg cctttataag gctcaggacg acggtaggcg cgcagatccg ctgcaacctg - 481 gccgatcacc tgcttatcag cgcctttcag cacgatttca gtctgagtcg gacattcagc - 541 agtgataccc gcaggcagct gatggtcaac aggatgagag aaacccagag acaggttaat - 601 cacattgcct ttaaccgctg cacggtaacc tacaccaacc agctgcagct tcttagtgaa - 661 gccttcggta acaccgataa ccattgagtt cagcagggca cgcgcggtac cagcctgtgc - 721 ccaaccgtct gcgtaaccat cacgcggacc gaaggtcagg gtattatctg catgtttaac - 781 ttcaacagca tcgttgagag tacgagtcag ctcgccgttt ttacctttga tcgtaataac - 841 ctgaccgttg atttttacgt caacgccggc aggaacaacg accggtgctt tagcaacacg - 901 agacattttt tcc - -// -LOCUS source_1 928 bp DNA linear CON 01-JUN-2026 -DEFINITION Escherichia coli K-12 substr. MG1655. -ACCESSION source_1 -KEYWORDS . -SOURCE Escherichia coli K-12 substr. MG1655 - ORGANISM Escherichia coli K-12 substr. MG1655 -FEATURES Location/Qualifiers - source 1..910 - /organism="K-12 substr. MG1655" - /mol_type="DNA" - /strain="K-12 substr. MG1655" - /db_xref="PRJNA57779" - gene complement(1..354) - /locus_tag="b3304" - CDS complement(1..354) - /locus_tag="b3304" - /codon_start="1" - /gene="rplR" - /translation="MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGS - LVAASTVEKAIAEQLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQAL - DAAREAGLQ" - /product="50S ribosomal subunit protein L18" - gene complement(364..897) - /locus_tag="b3305" - CDS complement(364..897) - /locus_tag="b3305" - /codon_start="1" - /gene="rplF" - /translation="MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKH - NTLTFGPRDGYADGWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLS - GFSHPVDHQLPAGITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYAD - VVRTKEAKK" - /product="50S ribosomal subunit protein L6" -ORIGIN - 1 acctctacct tagaactgaa ggccagcttc acgggcagca tctgccagtg cctggacacg - 61 accatgatat tggaacccgg aacggtcaaa ggatacatct ttgatgcctt tttccagagc - 121 gcgttcagcg acagctttac ccacagctgc agccgcgtct ttgttaccgg tgtacttcag - 181 ttgttcagcg atagcttttt ctacagtaga agcagctacc agaacttcag aaccgttcgg - 241 tgcaattacc tgtgcgtaaa tgtgacgcgg ggtacgatgt accaccaggc gagttgcgcc - 301 cagctcctgg agcttgcggc gtgcgcgggt cgcacgacgg atacgagcag atttcttatc - 361 catagtgtta ccttacttct tcttagcctc tttggtacgc acgacttcgt cggcgtaacg - 421 aacacccttg cctttataag gctcaggacg acggtaggcg cgcagatccg ctgcaacctg - 481 gccgatcacc tgcttatcag cgcctttcag cacgatttca gtctgagtcg gacattcagc - 541 agtgataccc gcaggcagct gatggtcaac aggatgagag aaacccagag acaggttaat - 601 cacattgcct ttaaccgctg cacggtaacc tacaccaacc agctgcagct tcttagtgaa - 661 gccttcggta acaccgataa ccattgagtt cagcagggca cgcgcggtac cagcctgtgc - 721 ccaaccgtct gcgtaaccat cacgcggacc gaaggtcagg gtattatctg catgtttaac - 781 ttcaacagca tcgttgagag tacgagtcag ctcgccgttt ttacctttga tcgtaataac - 841 ctgaccgttg atttttacgt caacgccggc aggaacaacg accggtgctt tagcaacacg - 901 agacattttt tcc - -// -LOCUS source_1 928 bp DNA linear CON 01-JUN-2026 -DEFINITION Escherichia coli K-12 substr. MG1655. -ACCESSION source_1 -KEYWORDS . -SOURCE Escherichia coli K-12 substr. MG1655 - ORGANISM Escherichia coli K-12 substr. MG1655 -FEATURES Location/Qualifiers - source 1..910 - /organism="K-12 substr. MG1655" - /mol_type="DNA" - /strain="K-12 substr. MG1655" - /db_xref="PRJNA57779" - gene complement(1..354) - /locus_tag="b3304" - CDS complement(1..354) - /locus_tag="b3304" - /codon_start="1" - /gene="rplR" - /translation="MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGS - LVAASTVEKAIAEQLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQAL - DAAREAGLQ" - /product="50S ribosomal subunit protein L18" - gene complement(364..897) - /locus_tag="b3305" - CDS complement(364..897) - /locus_tag="b3305" - /codon_start="1" - /gene="rplF" - /translation="MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKH - NTLTFGPRDGYADGWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLS - GFSHPVDHQLPAGITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYAD - VVRTKEAKK" - /product="50S ribosomal subunit protein L6" -ORIGIN - 1 acctctacct tagaactgaa ggccagcttc acgggcagca tctgccagtg cctggacacg - 61 accatgatat tggaacccgg aacggtcaaa ggatacatct ttgatgcctt tttccagagc - 121 gcgttcagcg acagctttac ccacagctgc agccgcgtct ttgttaccgg tgtacttcag - 181 ttgttcagcg atagcttttt ctacagtaga agcagctacc agaacttcag aaccgttcgg - 241 tgcaattacc tgtgcgtaaa tgtgacgcgg ggtacgatgt accaccaggc gagttgcgcc - 301 cagctcctgg agcttgcggc gtgcgcgggt cgcacgacgg atacgagcag atttcttatc - 361 catagtgtta ccttacttct tcttagcctc tttggtacgc acgacttcgt cggcgtaacg - 421 aacacccttg cctttataag gctcaggacg acggtaggcg cgcagatccg ctgcaacctg - 481 gccgatcacc tgcttatcag cgcctttcag cacgatttca gtctgagtcg gacattcagc - 541 agtgataccc gcaggcagct gatggtcaac aggatgagag aaacccagag acaggttaat - 601 cacattgcct ttaaccgctg cacggtaacc tacaccaacc agctgcagct tcttagtgaa - 661 gccttcggta acaccgataa ccattgagtt cagcagggca cgcgcggtac cagcctgtgc - 721 ccaaccgtct gcgtaaccat cacgcggacc gaaggtcagg gtattatctg catgtttaac - 781 ttcaacagca tcgttgagag tacgagtcag ctcgccgttt ttacctttga tcgtaataac - 841 ctgaccgttg atttttacgt caacgccggc aggaacaacg accggtgctt tagcaacacg - 901 agacattttt tcc - -// -LOCUS source_1 928 bp DNA linear CON 01-JUN-2026 -DEFINITION Escherichia coli K-12 substr. MG1655. -ACCESSION source_1 -KEYWORDS . -SOURCE Escherichia coli K-12 substr. MG1655 - ORGANISM Escherichia coli K-12 substr. MG1655 -FEATURES Location/Qualifiers - source 1..910 - /organism="K-12 substr. MG1655" - /mol_type="DNA" - /strain="K-12 substr. MG1655" - /db_xref="PRJNA57779" - gene complement(1..354) - /locus_tag="b3304" - CDS complement(1..354) - /locus_tag="b3304" - /codon_start="1" - /gene="rplR" - /translation="MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGS - LVAASTVEKAIAEQLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQAL - DAAREAGLQ" - /product="50S ribosomal subunit protein L18" - gene complement(364..897) - /locus_tag="b3305" - CDS complement(364..897) - /locus_tag="b3305" - /codon_start="1" - /gene="rplF" - /translation="MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKH - NTLTFGPRDGYADGWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLS - GFSHPVDHQLPAGITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYAD - VVRTKEAKK" - /product="50S ribosomal subunit protein L6" -ORIGIN - 1 acctctacct tagaactgaa ggccagcttc acgggcagca tctgccagtg cctggacacg - 61 accatgatat tggaacccgg aacggtcaaa ggatacatct ttgatgcctt tttccagagc - 121 gcgttcagcg acagctttac ccacagctgc agccgcgtct ttgttaccgg tgtacttcag - 181 ttgttcagcg atagcttttt ctacagtaga agcagctacc agaacttcag aaccgttcgg - 241 tgcaattacc tgtgcgtaaa tgtgacgcgg ggtacgatgt accaccaggc gagttgcgcc - 301 cagctcctgg agcttgcggc gtgcgcgggt cgcacgacgg atacgagcag atttcttatc - 361 catagtgtta ccttacttct tcttagcctc tttggtacgc acgacttcgt cggcgtaacg - 421 aacacccttg cctttataag gctcaggacg acggtaggcg cgcagatccg ctgcaacctg - 481 gccgatcacc tgcttatcag cgcctttcag cacgatttca gtctgagtcg gacattcagc - 541 agtgataccc gcaggcagct gatggtcaac aggatgagag aaacccagag acaggttaat - 601 cacattgcct ttaaccgctg cacggtaacc tacaccaacc agctgcagct tcttagtgaa - 661 gccttcggta acaccgataa ccattgagtt cagcagggca cgcgcggtac cagcctgtgc - 721 ccaaccgtct gcgtaaccat cacgcggacc gaaggtcagg gtattatctg catgtttaac - 781 ttcaacagca tcgttgagag tacgagtcag ctcgccgttt ttacctttga tcgtaataac - 841 ctgaccgttg atttttacgt caacgccggc aggaacaacg accggtgctt tagcaacacg - 901 agacattttt tcc - -// -LOCUS source_1 928 bp DNA linear CON 01-JUN-2026 -DEFINITION Escherichia coli K-12 substr. MG1655. -ACCESSION source_1 -KEYWORDS . -SOURCE Escherichia coli K-12 substr. MG1655 - ORGANISM Escherichia coli K-12 substr. MG1655 -FEATURES Location/Qualifiers - source 1..910 - /organism="K-12 substr. MG1655" - /mol_type="DNA" - /strain="K-12 substr. MG1655" - /db_xref="PRJNA57779" - gene complement(1..354) - /locus_tag="b3304" - CDS complement(1..354) - /locus_tag="b3304" - /codon_start="1" - /gene="rplR" - /translation="MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGS - LVAASTVEKAIAEQLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQAL - DAAREAGLQ" - /product="50S ribosomal subunit protein L18" - gene complement(364..897) - /locus_tag="b3305" - CDS complement(364..897) - /locus_tag="b3305" - /codon_start="1" - /gene="rplF" - /translation="MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKH - NTLTFGPRDGYADGWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLS - GFSHPVDHQLPAGITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYAD - VVRTKEAKK" - /product="50S ribosomal subunit protein L6" -ORIGIN - 1 acctctacct tagaactgaa ggccagcttc acgggcagca tctgccagtg cctggacacg - 61 accatgatat tggaacccgg aacggtcaaa ggatacatct ttgatgcctt tttccagagc - 121 gcgttcagcg acagctttac ccacagctgc agccgcgtct ttgttaccgg tgtacttcag - 181 ttgttcagcg atagcttttt ctacagtaga agcagctacc agaacttcag aaccgttcgg - 241 tgcaattacc tgtgcgtaaa tgtgacgcgg ggtacgatgt accaccaggc gagttgcgcc - 301 cagctcctgg agcttgcggc gtgcgcgggt cgcacgacgg atacgagcag atttcttatc - 361 catagtgtta ccttacttct tcttagcctc tttggtacgc acgacttcgt cggcgtaacg - 421 aacacccttg cctttataag gctcaggacg acggtaggcg cgcagatccg ctgcaacctg - 481 gccgatcacc tgcttatcag cgcctttcag cacgatttca gtctgagtcg gacattcagc - 541 agtgataccc gcaggcagct gatggtcaac aggatgagag aaacccagag acaggttaat - 601 cacattgcct ttaaccgctg cacggtaacc tacaccaacc agctgcagct tcttagtgaa - 661 gccttcggta acaccgataa ccattgagtt cagcagggca cgcgcggtac cagcctgtgc - 721 ccaaccgtct gcgtaaccat cacgcggacc gaaggtcagg gtattatctg catgtttaac - 781 ttcaacagca tcgttgagag tacgagtcag ctcgccgttt ttacctttga tcgtaataac - 841 ctgaccgttg atttttacgt caacgccggc aggaacaacg accggtgctt tagcaacacg - 901 agacattttt tcc - -// From 2c072f75eb63e2916766504e55ae533c38c52a78 Mon Sep 17 00:00:00 2001 From: LCrossman Date: Mon, 1 Jun 2026 13:33:46 +0100 Subject: [PATCH 09/16] checking packages in exclude --- microBioRust/Cargo.toml | 2 +- microBioRust/cleaned_dna.fasta | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) delete mode 100644 microBioRust/cleaned_dna.fasta diff --git a/microBioRust/Cargo.toml b/microBioRust/Cargo.toml index 0be129e..790c3b5 100644 --- a/microBioRust/Cargo.toml +++ b/microBioRust/Cargo.toml @@ -14,7 +14,7 @@ categories = [ ] readme = "README.md" authors = ["Lisa Crossman and microBioRust community"] -exclude = [".git", ".gitignore", ".dribble.example.embl", "tests/data/", "src/bin","config.toml","K12_ribo.gbk","Rhiz3841.gbk.gb","Rhiz3841.gbk.gb_out.faa","rust_via_python_countgbk2faa.py","rust_via_python_gbk2faa.py","asv.conf.json","benchmarks/"] +exclude = [".git", ".gitignore", ".dribble.example.embl", "tests/", "src/bin","config.toml","K12_ribo.gbk","Rhiz3841.gbk.gb","Rhiz3841.gbk.gb_out.faa","rust_via_python_countgbk2faa.py","rust_via_python_gbk2faa.py","asv.conf.json","benchmarks/"] repository = "https://github.com/microBioRust/microBioRust" documentation = "https://microbiorust.github.io/docs/" diff --git a/microBioRust/cleaned_dna.fasta b/microBioRust/cleaned_dna.fasta deleted file mode 100644 index 53dfabd..0000000 --- a/microBioRust/cleaned_dna.fasta +++ /dev/null @@ -1,6 +0,0 @@ ->seq1 -ATGC-ATGCATGCATGC ->seq2 -ATGCAATGCTTGCATGC ->seq3 -TTGCAATCCATGCAAGC From d94126b4131a466cd0d8d2b1a7aae495accac3f8 Mon Sep 17 00:00:00 2001 From: LCrossman Date: Mon, 1 Jun 2026 14:25:41 +0100 Subject: [PATCH 10/16] adding new microBioRust version to cargo.toml --- microbiorust-py/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/microbiorust-py/Cargo.toml b/microbiorust-py/Cargo.toml index e06d125..b277737 100644 --- a/microbiorust-py/Cargo.toml +++ b/microbiorust-py/Cargo.toml @@ -28,7 +28,7 @@ default = [] extension-module = ["pyo3/extension-module"] [dependencies] -microBioRust = { path = "../microBioRust" } +microBioRust = "0.1.4" microBioRust-seqmetrics = "0.1.3" pythonize = "0.26" pyo3-stub-gen = "0.17.2" From 316964c0bb2f4dae2d9e5421a569fc42a5518b2b Mon Sep 17 00:00:00 2001 From: LCrossman Date: Mon, 1 Jun 2026 15:02:22 +0100 Subject: [PATCH 11/16] improving gitignore --- microbiorust-py/.gitignore | 2 + microbiorust-py/config.toml.bak | 13 ++++ microbiorust-py/for_pip_publ.txt | 9 +++ .../microbiorust/_microbiorust.pyi | 70 +++++++++++++++++- microbiorust-py/pyproject.toml | 10 ++- .../test_mbr.cpython-313-pytest-9.0.2.pyc | Bin 62201 -> 62227 bytes 6 files changed, 98 insertions(+), 6 deletions(-) create mode 100644 microbiorust-py/config.toml.bak create mode 100644 microbiorust-py/for_pip_publ.txt diff --git a/microbiorust-py/.gitignore b/microbiorust-py/.gitignore index e8eea5f..428cb0b 100644 --- a/microbiorust-py/.gitignore +++ b/microbiorust-py/.gitignore @@ -1,6 +1,8 @@ # Rust target/ +*.dylib* +*.faa # Python __pycache__/ *.pyc diff --git a/microbiorust-py/config.toml.bak b/microbiorust-py/config.toml.bak new file mode 100644 index 0000000..2a3119b --- /dev/null +++ b/microbiorust-py/config.toml.bak @@ -0,0 +1,13 @@ +# .cargo/config.toml + +[target.aarch64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] + +[target.x86_64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] diff --git a/microbiorust-py/for_pip_publ.txt b/microbiorust-py/for_pip_publ.txt new file mode 100644 index 0000000..e416796 --- /dev/null +++ b/microbiorust-py/for_pip_publ.txt @@ -0,0 +1,9 @@ +And only add abi3 in your release CI/workflow: +maturin build --release --features abi3 + +So the full matrix is: +Who Command abi3 +Dev cloning from GitHub maturin develop ❌ no linking issues +You releasing to PyPI maturin build --release --features abi3 ✅ broad wheel +pip user pre-built wheel ✅ already baked in +cargo test cargo test ❌ tests work diff --git a/microbiorust-py/microbiorust/_microbiorust.pyi b/microbiorust-py/microbiorust/_microbiorust.pyi index e0e5498..20f8219 100644 --- a/microbiorust-py/microbiorust/_microbiorust.pyi +++ b/microbiorust-py/microbiorust/_microbiorust.pyi @@ -4,6 +4,21 @@ import builtins import typing +@typing.final +class FaaCollection: + def __len__(self) -> builtins.int: ... + def __contains__(self, tag: builtins.str) -> builtins.bool: ... + def __repr__(self) -> builtins.str: ... + def __getitem__(self, tag: builtins.str) -> PyFaaInfo: ... + def values(self) -> list: ... + def items(self) -> list: ... + def keys(self) -> list: ... + def __iter__(self) -> LocusTagIterator: ... + def to_json(self) -> builtins.str: + r""" + Serializes the collection to a formatted JSON string + """ + @typing.final class FeatureCollection: def __len__(self) -> builtins.int: ... @@ -14,12 +29,39 @@ class FeatureCollection: def items(self) -> list: ... def keys(self) -> list: ... def __iter__(self) -> LocusTagIterator: ... + def to_json(self) -> builtins.str: + r""" + Serializes the collection to a formatted JSON string + """ + +@typing.final +class FfnCollection: + def __len__(self) -> builtins.int: ... + def __contains__(self, tag: builtins.str) -> builtins.bool: ... + def __repr__(self) -> builtins.str: ... + def __getitem__(self, tag: builtins.str) -> PyFfnInfo: ... + def values(self) -> list: ... + def items(self) -> list: ... + def keys(self) -> list: ... + def __iter__(self) -> LocusTagIterator: ... + def to_json(self) -> builtins.str: + r""" + Serializes the collection to a formatted JSON string + """ @typing.final class LocusTagIterator: def __iter__(self) -> LocusTagIterator: ... def __next__(self) -> typing.Optional[builtins.str]: ... +@typing.final +class PyFaaInfo: + @property + def locus_tag(self) -> builtins.str: ... + @property + def faa(self) -> builtins.str: ... + def __repr__(self) -> builtins.str: ... + @typing.final class PyFeatureInfo: @property @@ -40,6 +82,14 @@ class PyFeatureInfo: def extras(self) -> builtins.list[builtins.str]: ... def __repr__(self) -> builtins.str: ... +@typing.final +class PyFfnInfo: + @property + def locus_tag(self) -> builtins.str: ... + @property + def ffn(self) -> builtins.str: ... + def __repr__(self) -> builtins.str: ... + @typing.final class PyRecord: def id(self) -> builtins.str: ... @@ -47,6 +97,8 @@ class PyRecord: def locus_tag(self) -> builtins.list[builtins.str]: ... def sequences(self) -> SequenceCollection: ... def features(self) -> FeatureCollection: ... + def faa(self) -> FaaCollection: ... + def ffn(self) -> FfnCollection: ... def __repr__(self) -> builtins.str: ... def __getitem__(self, tag: builtins.str) -> typing.Any: ... @@ -75,6 +127,12 @@ class RecordCollection: def write_faa(self, filename: builtins.str) -> tuple[builtins.str, builtins.int]: ... def write_ffn(self, filename: builtins.str) -> tuple[builtins.str, builtins.int]: ... def write_fna(self, filename: builtins.str) -> tuple[builtins.str, builtins.int]: ... + def values(self) -> list: ... + def items(self) -> list: ... + def to_json(self) -> builtins.str: + r""" + Serializes the RecordCollection to a formatted JSON string + """ @typing.final class SequenceCollection: @@ -86,24 +144,28 @@ class SequenceCollection: def items(self) -> list: ... def keys(self) -> list: ... def __iter__(self) -> LocusTagIterator: ... + def to_json(self) -> builtins.str: + r""" + Serializes the collection to a formatted JSON string + """ def amino_counts(seq: builtins.str) -> builtins.dict[builtins.str, builtins.int]: ... def amino_percentage(seq: builtins.str) -> builtins.dict[builtins.str, builtins.float]: ... -def embl_to_faa(filename: builtins.str) -> SequenceCollection: ... +def embl_to_faa(filename: builtins.str) -> FaaCollection: ... -def embl_to_ffn(filename: builtins.str) -> SequenceCollection: ... +def embl_to_ffn(filename: builtins.str) -> FfnCollection: ... def embl_to_fna(filename: builtins.str) -> RecordCollection: ... def embl_to_gff(filename: builtins.str, dna: builtins.bool) -> None: ... -def gbk_to_faa(filename: builtins.str) -> SequenceCollection: ... +def gbk_to_faa(filename: builtins.str) -> FaaCollection: ... def gbk_to_faa_count(filename: builtins.str) -> builtins.int: ... -def gbk_to_ffn(filename: builtins.str) -> SequenceCollection: ... +def gbk_to_ffn(filename: builtins.str) -> FfnCollection: ... def gbk_to_fna(filename: builtins.str) -> RecordCollection: ... diff --git a/microbiorust-py/pyproject.toml b/microbiorust-py/pyproject.toml index a83e338..3234e0a 100644 --- a/microbiorust-py/pyproject.toml +++ b/microbiorust-py/pyproject.toml @@ -12,17 +12,23 @@ authors = [ { name = "LCrossman" }, { name = "microBioRust Community" } ] +keywords = ["bioinformatics", "genomics", "microbial genomics", "bacteria", "genbank", "embl", "gff3", "microbiorust", "rust"] description = "Python bindings for microbiorust, Microbiology friendly bioinformatics functions" classifiers = [ "Programming Language :: Rust", - "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Intended Audience :: Science/Research", "Topic :: Scientific/Engineering :: Bio-Informatics", "License :: OSI Approved :: MIT License", - "Development Status :: 3 - Alpha", + "Development Status :: 4 - Beta", ] +license = {file = "LICENSE"} + [project.urls] Homepage = "https://microbiorust.github.io/docs/" Repository = "https://github.com/microBioRust/microBioRust" diff --git a/microbiorust-py/tests/__pycache__/test_mbr.cpython-313-pytest-9.0.2.pyc b/microbiorust-py/tests/__pycache__/test_mbr.cpython-313-pytest-9.0.2.pyc index e235002533d5e636c597827fc7523eea616f839c..dd6b293ab3b019e000345b4846feb523a2937e17 100644 GIT binary patch delta 73 zcmezQlzH+qX0Fe?yj%=G@H}rL*G68w82!-V)S_bjoXq0H Date: Mon, 1 Jun 2026 15:26:06 +0100 Subject: [PATCH 12/16] adding to exclude list --- microbiorust-py/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/microbiorust-py/pyproject.toml b/microbiorust-py/pyproject.toml index 3234e0a..3ca6fcd 100644 --- a/microbiorust-py/pyproject.toml +++ b/microbiorust-py/pyproject.toml @@ -44,7 +44,7 @@ include = [ "microbiorust/*.pyi", "microbiorust/py.typed" ] -exclude = ["src/bin/*", "benchmarks/*", "tests/*"] +exclude = ["src/bin/*", "*.embl", "*.gb", "*.gff", "*.faa", "asv.conf.json", "rust_via*.py", "bp_gbk2faa.py", "*.svg", "for_pip_publ.txt", "*.bak","benchmarks/*", "tests/*", ".github/*", "docs/*", ".gitignore"] [tool.ruff] line-length = 127 From ec4934d9e961a0b407e0b574cd15c2de7a49cbe1 Mon Sep 17 00:00:00 2001 From: LCrossman Date: Mon, 1 Jun 2026 15:32:46 +0100 Subject: [PATCH 13/16] adding to exclude for cargo --- microbiorust-py/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/microbiorust-py/Cargo.toml b/microbiorust-py/Cargo.toml index b277737..89eb4a1 100644 --- a/microbiorust-py/Cargo.toml +++ b/microbiorust-py/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT" keywords = ["bioinformatics","micro","bio","genomics","sequence-analysis"] description = "Python bindings for microBioRust Microbiology friendly bioinformatics Rust functions" categories = ["science::bioinformatics::sequence-analysis", "science::bioinformatics::genomics", "science::bioinformatics","science","data-structures"] -exclude = [".git",".gitignore","src/bin","config.toml","K12_ribo.gbk","Rhiz3841.gbk.gb","Rhiz3841.gbk.gb_out.faa","rust_via_python_countgbk2faa.py","rust_via_python_gbk2faa.py","asv.conf.json","benchmarks/"] +exclude = [".git",".gitignore","tests/", "rust_via*.py", "*embl", "*.gff", "*.faa", "src/bin","config.toml","K12_ribo.gbk","Rhiz3841.gbk.gb","Rhiz3841.gbk.gb_out.faa","rust_via_python_countgbk2faa.py","rust_via_python_gbk2faa.py","asv.conf.json","benchmarks/"] repository = "https://github.com/microBioRust/microBioRust" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html From d457666a30e37feaae932770e88779875dba2e1a Mon Sep 17 00:00:00 2001 From: LCrossman Date: Mon, 1 Jun 2026 15:34:23 +0100 Subject: [PATCH 14/16] adding to exclude for cargo --- microbiorust-py/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/microbiorust-py/Cargo.toml b/microbiorust-py/Cargo.toml index 89eb4a1..3e0af16 100644 --- a/microbiorust-py/Cargo.toml +++ b/microbiorust-py/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT" keywords = ["bioinformatics","micro","bio","genomics","sequence-analysis"] description = "Python bindings for microBioRust Microbiology friendly bioinformatics Rust functions" categories = ["science::bioinformatics::sequence-analysis", "science::bioinformatics::genomics", "science::bioinformatics","science","data-structures"] -exclude = [".git",".gitignore","tests/", "rust_via*.py", "*embl", "*.gff", "*.faa", "src/bin","config.toml","K12_ribo.gbk","Rhiz3841.gbk.gb","Rhiz3841.gbk.gb_out.faa","rust_via_python_countgbk2faa.py","rust_via_python_gbk2faa.py","asv.conf.json","benchmarks/"] +exclude = [".git",".gitignore","tests/", "rust_via*.py", "*.bak", "*.orig", "*.svg", "*embl", "*.gff", "*.faa", "src/bin","config.toml","K12_ribo.gbk","Rhiz3841.gbk.gb","Rhiz3841.gbk.gb_out.faa","rust_via_python_countgbk2faa.py","rust_via_python_gbk2faa.py","asv.conf.json","benchmarks/"] repository = "https://github.com/microBioRust/microBioRust" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html From fe197c92d69e17bc675391488898dce1511606e4 Mon Sep 17 00:00:00 2001 From: LCrossman Date: Mon, 1 Jun 2026 15:35:21 +0100 Subject: [PATCH 15/16] adding to exclude for cargo --- microbiorust-py/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/microbiorust-py/Cargo.toml b/microbiorust-py/Cargo.toml index 3e0af16..4a49583 100644 --- a/microbiorust-py/Cargo.toml +++ b/microbiorust-py/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT" keywords = ["bioinformatics","micro","bio","genomics","sequence-analysis"] description = "Python bindings for microBioRust Microbiology friendly bioinformatics Rust functions" categories = ["science::bioinformatics::sequence-analysis", "science::bioinformatics::genomics", "science::bioinformatics","science","data-structures"] -exclude = [".git",".gitignore","tests/", "rust_via*.py", "*.bak", "*.orig", "*.svg", "*embl", "*.gff", "*.faa", "src/bin","config.toml","K12_ribo.gbk","Rhiz3841.gbk.gb","Rhiz3841.gbk.gb_out.faa","rust_via_python_countgbk2faa.py","rust_via_python_gbk2faa.py","asv.conf.json","benchmarks/"] +exclude = [".git",".gitignore","tests/", "rust_via*.py", "*.bak", "bp_gbk2faa.py", "for_pip_publ.txt", "*.orig", "*.svg", "*embl", "*.gff", "*.faa", "src/bin","config.toml","K12_ribo.gbk","Rhiz3841.gbk.gb","Rhiz3841.gbk.gb_out.faa","rust_via_python_countgbk2faa.py","rust_via_python_gbk2faa.py","asv.conf.json","benchmarks/"] repository = "https://github.com/microBioRust/microBioRust" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html From 12af5df20c8de2af7995849f255fe76995d6ea80 Mon Sep 17 00:00:00 2001 From: LCrossman Date: Mon, 1 Jun 2026 15:51:10 +0100 Subject: [PATCH 16/16] adding LICENSE specifically for microbiorust-py --- microbiorust-py/LICENSE | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 microbiorust-py/LICENSE diff --git a/microbiorust-py/LICENSE b/microbiorust-py/LICENSE new file mode 100644 index 0000000..a7d8ef9 --- /dev/null +++ b/microbiorust-py/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 LCrossman + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.