diff --git a/.gitignore b/.gitignore index d81714f..e57840d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,17 @@ # Generated by Cargo # will have compiled files and executables -debug/ target/ +dist/ +*.egg-info/ +__pycache__/ +*.pyc +debug/ + +*.so +*.abi3.so +*.dylib +*.pyd + expand.rs # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html @@ -25,3 +35,4 @@ env/ venv/ site/ static/ +*.gff diff --git a/microBioRust/Cargo.toml b/microBioRust/Cargo.toml index 6238591..790c3b5 100644 --- a/microBioRust/Cargo.toml +++ b/microBioRust/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "microBioRust" -version = "0.1.3" -edition = "2021" +version = "0.1.4" +edition = "2024" license = "MIT" keywords = ["bioinformatics", "micro", "bio", "genomics", "sequence-analysis"] description = "Microbiology friendly bioinformatics Rust functions" @@ -14,7 +14,7 @@ categories = [ ] readme = "README.md" authors = ["Lisa Crossman and microBioRust community"] -exclude = [".git", ".gitignore", ".dribble.example.embl", "tests/data/"] +exclude = [".git", ".gitignore", ".dribble.example.embl", "tests/", "src/bin","config.toml","K12_ribo.gbk","Rhiz3841.gbk.gb","Rhiz3841.gbk.gb_out.faa","rust_via_python_countgbk2faa.py","rust_via_python_gbk2faa.py","asv.conf.json","benchmarks/"] repository = "https://github.com/microBioRust/microBioRust" documentation = "https://microbiorust.github.io/docs/" @@ -27,7 +27,11 @@ path = "src/lib.rs" [[example]] name = "blast-example" -path = "examples/src/blast_parse.rs" +path = "examples/blast_parse.rs" + +[[example]] +name = "convert-to-faa" +path = "examples/convert_to_faa.rs" [dependencies] clap = { version = "4.5.19", features = ["derive"] } diff --git a/microBioRust/examples/src/blast_parse.rs b/microBioRust/examples/blast_parse.rs similarity index 69% rename from microBioRust/examples/src/blast_parse.rs rename to microBioRust/examples/blast_parse.rs index 9daded5..3b875a7 100644 --- a/microBioRust/examples/src/blast_parse.rs +++ b/microBioRust/examples/blast_parse.rs @@ -1,17 +1,15 @@ -use anyhow::{Context, Result}; -use async_compression::tokio::bufread::GzipDecoder as AsyncGzDecoder; +use anyhow::Result; use clap::Parser; -use quick_xml::events::Event; -use quick_xml::reader::Reader; -use quick_xml::escape::unescape; -use serde::Serialize; -use serde_json::ser::Serializer as JsonSerializer; use microBioRust::blast::*; -use std::io::Cursor; -use tokio::io::{self, AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncWriteExt, BufReader}; +use tokio::io::AsyncWriteExt; #[derive(Parser, Debug)] -#[command(name = "blast-parsers", author, version, about = "async microBioRust BLAST parsers: for outfmt6 (single line tabular) and outfmt5 (xml)")] +#[command( + name = "blast-parsers", + author, + version, + about = "async microBioRust BLAST parsers: for outfmt6 (single line tabular) and outfmt5 (xml)" +)] struct Cli { ///Use .gz for gzip-compressed files. #[arg(short, long, default_value = "-")] @@ -44,7 +42,11 @@ async fn main() -> Result<()> { buf.push(b'\n'); tokio::io::stdout().write_all(&buf).await?; } else { - println!("query {:?} hits {}", iter_rec.query_def, iter_rec.hits.len()); + println!( + "query {:?} hits {}", + iter_rec.query_def, + iter_rec.hits.len() + ); } } Err(e) => eprintln!("xml parse error: {}", e), diff --git a/microBioRust/examples/convert_to_faa.rs b/microBioRust/examples/convert_to_faa.rs new file mode 100644 index 0000000..3d972a2 --- /dev/null +++ b/microBioRust/examples/convert_to_faa.rs @@ -0,0 +1,34 @@ + use clap::Parser; + use std::{ + fs::File, + io::{Write, BufWriter}, + }; + use microBioRust::{ + genbank, + }; + + #[derive(Parser, Debug)] + #[clap(author, version, about)] + struct Arguments { + #[clap(short, long)] + filename: String, + #[clap(short, long)] + output: String, + } + +fn main() -> Result<(), anyhow::Error> { + let args = Arguments::parse(); + let records = genbank!(&args.filename); + let file = File::create(&args.output)?; + let mut writer = BufWriter::new(file); + for record in records { + for (k, _v) in &record.cds.attributes { + if let Some(seq) = record.seq_features.get_sequence_faa(k) { + writeln!(writer, ">{}|{}\n{}", &record.id, &k, seq)?; + } + } + } + writer.flush()?; + Ok(()) +} + diff --git a/microBioRust/src/embl.rs b/microBioRust/src/embl.rs index ed571f1..9b3d666 100644 --- a/microBioRust/src/embl.rs +++ b/microBioRust/src/embl.rs @@ -263,14 +263,14 @@ //!``` //! -use anyhow::{anyhow, Context}; +use anyhow::{Context, anyhow}; use bio::alphabets::dna::revcomp; use chrono::prelude::*; use lazy_static::lazy_static; use paste::paste; use protein_translate::translate; -use serde::Serialize; use regex::Regex; +use serde::Serialize; use std::{ collections::{BTreeMap, HashSet}, convert::{AsRef, TryInto}, @@ -600,8 +600,8 @@ where //println!("designated codon start {:?} {:?}", &codon_start, &locus_tag); } if self.line_buffer.contains("/gene=") { - let gen: Vec<&str> = self.line_buffer.split('\"').collect(); - gene = gen[1].to_string(); + let genes: Vec<&str> = self.line_buffer.split('\"').collect(); + gene = genes[1].to_string(); //println!("gene designated {:?} {:?}", &gene, &locus_tag); } if self.line_buffer.contains("/product") { @@ -1542,7 +1542,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_read_file() { - let content = std::fs::read_to_string("example.embl").expect("error reading file"); + let content = std::fs::read_to_string("tests/example.embl").expect("error reading file"); assert!(content.contains("ID")); assert!(content.len() > 0); } @@ -1553,7 +1553,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_parse_embl() { - let file_embl = "example.embl"; + let file_embl = "tests/example.embl"; let records = embl!(&file_embl); assert!(records.len() > 0); } @@ -1564,7 +1564,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_parse_source_attributes() { - let file_embl = "example.embl"; + let file_embl = "tests/example.embl"; let records = embl!(&file_embl); if let Some(record) = records.first() { if let Some((key, val)) = record.source_map.source_attributes.first_key_value() { @@ -1579,7 +1579,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_parse_cds_attributes() { - let file_embl = "example.embl"; + let file_embl = "tests/example.embl"; let records = embl!(&file_embl); if let Some(record) = records.first() { if let Some((locus_tag, vals)) = record.cds.attributes.first_key_value() { @@ -1598,7 +1598,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_parse_sequence_attributes() { - let file_embl = "example.embl"; + let file_embl = "tests/example.embl"; let records = embl!(&file_embl); if let Some(record) = records.first() { if let Some((key, vals)) = record.cds.attributes.first_key_value() { diff --git a/microBioRust/src/gbk.rs b/microBioRust/src/gbk.rs index e20b22c..186f427 100644 --- a/microBioRust/src/gbk.rs +++ b/microBioRust/src/gbk.rs @@ -126,7 +126,7 @@ //! let mut read_counter: u32 = 0; //! let mut seq_region: BTreeMap = BTreeMap::new(); //! let mut record_vec: Vec = Vec::new(); -//! loop { +//! loop { //! match records.next() { //! Some(Ok(mut record)) => { //! println!("next record"); @@ -275,7 +275,7 @@ //!``` //! -use anyhow::{anyhow, Context}; +use anyhow::{Context, anyhow}; use bio::alphabets::dna::revcomp; use chrono::prelude::*; use itertools::Itertools; @@ -681,8 +681,8 @@ where //println!("designated codon start {:?} {:?}", &codon_start, &locus_tag); } if self.line_buffer.contains("/gene=") { - let gen: Vec<&str> = self.line_buffer.split('\"').collect(); - gene = gen[1].to_string(); + let genes: Vec<&str> = self.line_buffer.split('\"').collect(); + gene = genes[1].to_string(); //println!("gene designated {:?} {:?}", &gene, &locus_tag); } if self.line_buffer.contains("/product") { @@ -1732,7 +1732,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_read_file() { - let content = std::fs::read_to_string("K12_ribo.gbk").expect("error reading file"); + let content = std::fs::read_to_string("tests/K12_ribo.gbk").expect("error reading file"); assert!(content.contains("LOCUS")); assert!(content.len() > 0); } @@ -1743,7 +1743,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_parse_gbk() { - let file_gbk = "K12_ribo.gbk"; + let file_gbk = "tests/K12_ribo.gbk"; let records = genbank!(&file_gbk); assert!(records.len() > 0); } @@ -1754,7 +1754,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_parse_source_attributes() { - let file_gbk = "K12_ribo.gbk"; + let file_gbk = "tests/K12_ribo.gbk"; let records = genbank!(&file_gbk); if let Some(record) = records.first() { if let Some((key, val)) = record.source_map.source_attributes.first_key_value() { @@ -1769,7 +1769,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_parse_cds_attributes() { - let file_gbk = "K12_ribo.gbk"; + let file_gbk = "tests/K12_ribo.gbk"; let records = genbank!(&file_gbk); if let Some(record) = records.first() { if let Some((locus_tag, vals)) = record.cds.attributes.first_key_value() { @@ -1788,7 +1788,7 @@ mod tests { #[allow(unused_assignments)] #[allow(unused_imports)] fn test_parse_sequence_attributes() { - let file_gbk = "K12_ribo.gbk"; + let file_gbk = "tests/K12_ribo.gbk"; let records = genbank!(&file_gbk); if let Some(record) = records.first() { if let Some((key, vals)) = record.cds.attributes.first_key_value() { diff --git a/microBioRust/K12_ribo.gbk b/microBioRust/tests/K12_ribo.gbk similarity index 100% rename from microBioRust/K12_ribo.gbk rename to microBioRust/tests/K12_ribo.gbk diff --git a/microBioRust/Rhiz3841.embl b/microBioRust/tests/Rhiz3841.embl similarity index 100% rename from microBioRust/Rhiz3841.embl rename to microBioRust/tests/Rhiz3841.embl diff --git a/microBioRust/Rhiz3841.gbk.gb b/microBioRust/tests/Rhiz3841.gbk.gb similarity index 100% rename from microBioRust/Rhiz3841.gbk.gb rename to microBioRust/tests/Rhiz3841.gbk.gb diff --git a/microBioRust/tests/cleaned_dna.fasta b/microBioRust/tests/cleaned_dna.fasta new file mode 100644 index 0000000..53dfabd --- /dev/null +++ b/microBioRust/tests/cleaned_dna.fasta @@ -0,0 +1,6 @@ +>seq1 +ATGC-ATGCATGCATGC +>seq2 +ATGCAATGCTTGCATGC +>seq3 +TTGCAATCCATGCAAGC diff --git a/microBioRust/tests/embl_to_faa.rs b/microBioRust/tests/embl_to_faa.rs index 86933c7..e44fe32 100644 --- a/microBioRust/tests/embl_to_faa.rs +++ b/microBioRust/tests/embl_to_faa.rs @@ -2,7 +2,7 @@ use microBioRust::embl::Reader; use std::fs; #[test] fn embl_to_faa() -> Result<(), anyhow::Error> { - let file_embl = fs::File::open("example.embl")?; + let file_embl = fs::File::open("tests/example.embl")?; let reader = Reader::new(file_embl); let mut records = reader.records(); let mut read_counter: u32 = 0; diff --git a/microBioRust/tests/embl_to_ffn.rs b/microBioRust/tests/embl_to_ffn.rs index 70a6f7a..7486ca3 100644 --- a/microBioRust/tests/embl_to_ffn.rs +++ b/microBioRust/tests/embl_to_ffn.rs @@ -2,7 +2,7 @@ use microBioRust::embl::Reader; use std::fs; #[test] pub fn embl_to_ffn() -> Result<(), anyhow::Error> { - let file_embl = fs::File::open("example.embl")?; + let file_embl = fs::File::open("tests/example.embl")?; let reader = Reader::new(file_embl); let mut records = reader.records(); let mut read_counter: u32 = 0; diff --git a/microBioRust/tests/embl_to_gff.rs b/microBioRust/tests/embl_to_gff.rs index 099cf6a..d385f8a 100644 --- a/microBioRust/tests/embl_to_gff.rs +++ b/microBioRust/tests/embl_to_gff.rs @@ -1,10 +1,10 @@ -use microBioRust::embl::{gff_write, Reader, Record}; +use microBioRust::embl::{Reader, Record, gff_write}; use std::collections::BTreeMap; use std::fs; #[test] fn test_embl_to_gff() -> std::io::Result<()> { - let file_embl = fs::File::open("example.embl")?; + let file_embl = fs::File::open("tests/example.embl")?; let reader = Reader::new(file_embl); let mut records = reader.records(); let mut read_counter: u32 = 0; diff --git a/microBioRust/example.embl b/microBioRust/tests/example.embl similarity index 100% rename from microBioRust/example.embl rename to microBioRust/tests/example.embl diff --git a/microBioRust/tests/genbank_to_faa.rs b/microBioRust/tests/genbank_to_faa.rs index 2dbee2e..5eb8197 100644 --- a/microBioRust/tests/genbank_to_faa.rs +++ b/microBioRust/tests/genbank_to_faa.rs @@ -2,7 +2,7 @@ use microBioRust::gbk::Reader; use std::fs; #[test] pub fn genbank_to_faa() -> Result<(), anyhow::Error> { - let file_gbk = fs::File::open("K12_ribo.gbk")?; + let file_gbk = fs::File::open("tests/K12_ribo.gbk")?; let reader = Reader::new(file_gbk); let mut records = reader.records(); let mut read_counter: u32 = 0; diff --git a/microBioRust/tests/genbank_to_ffn.rs b/microBioRust/tests/genbank_to_ffn.rs index 7ba93aa..3a285d2 100644 --- a/microBioRust/tests/genbank_to_ffn.rs +++ b/microBioRust/tests/genbank_to_ffn.rs @@ -2,7 +2,7 @@ use microBioRust::gbk::Reader; use std::fs; #[test] pub fn genbank_to_ffn() -> Result<(), anyhow::Error> { - let file_gbk = fs::File::open("K12_ribo.gbk")?; + let file_gbk = fs::File::open("tests/K12_ribo.gbk")?; let reader = Reader::new(file_gbk); let mut records = reader.records(); let mut read_counter: u32 = 0; diff --git a/microBioRust/tests/genbank_to_gff.rs b/microBioRust/tests/genbank_to_gff.rs index 9274ff9..5f860bb 100644 --- a/microBioRust/tests/genbank_to_gff.rs +++ b/microBioRust/tests/genbank_to_gff.rs @@ -1,10 +1,10 @@ -use microBioRust::gbk::{gff_write, Reader, Record}; +use microBioRust::gbk::{Reader, Record, gff_write}; use std::collections::BTreeMap; use std::fs; use std::io; #[test] pub fn genbank_to_gff() -> io::Result<()> { - let file_gbk = fs::File::open("K12_ribo.gbk")?; + let file_gbk = fs::File::open("tests/K12_ribo.gbk")?; let _prev_start: u32 = 0; let mut prev_end: u32 = 0; let reader = Reader::new(file_gbk); diff --git a/microBioRust/tests/new_output_embl.gbk b/microBioRust/tests/new_output_embl.gbk new file mode 100644 index 0000000..0a27db7 --- /dev/null +++ b/microBioRust/tests/new_output_embl.gbk @@ -0,0 +1,52 @@ +LOCUS source_1 928 bp DNA linear CON 01-JUN-2026 +DEFINITION Escherichia coli K-12 substr. MG1655. +ACCESSION source_1 +KEYWORDS . +SOURCE Escherichia coli K-12 substr. MG1655 + ORGANISM Escherichia coli K-12 substr. MG1655 +FEATURES Location/Qualifiers + source 1..910 + /organism="K-12 substr. MG1655" + /mol_type="DNA" + /strain="K-12 substr. MG1655" + /db_xref="PRJNA57779" + gene complement(1..354) + /locus_tag="b3304" + CDS complement(1..354) + /locus_tag="b3304" + /codon_start="1" + /gene="rplR" + /translation="MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGS + LVAASTVEKAIAEQLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQAL + DAAREAGLQ" + /product="50S ribosomal subunit protein L18" + gene complement(364..897) + /locus_tag="b3305" + CDS complement(364..897) + /locus_tag="b3305" + /codon_start="1" + /gene="rplF" + /translation="MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKH + NTLTFGPRDGYADGWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLS + GFSHPVDHQLPAGITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYAD + VVRTKEAKK" + /product="50S ribosomal subunit protein L6" +ORIGIN + 1 acctctacct tagaactgaa ggccagcttc acgggcagca tctgccagtg cctggacacg + 61 accatgatat tggaacccgg aacggtcaaa ggatacatct ttgatgcctt tttccagagc + 121 gcgttcagcg acagctttac ccacagctgc agccgcgtct ttgttaccgg tgtacttcag + 181 ttgttcagcg atagcttttt ctacagtaga agcagctacc agaacttcag aaccgttcgg + 241 tgcaattacc tgtgcgtaaa tgtgacgcgg ggtacgatgt accaccaggc gagttgcgcc + 301 cagctcctgg agcttgcggc gtgcgcgggt cgcacgacgg atacgagcag atttcttatc + 361 catagtgtta ccttacttct tcttagcctc tttggtacgc acgacttcgt cggcgtaacg + 421 aacacccttg cctttataag gctcaggacg acggtaggcg cgcagatccg ctgcaacctg + 481 gccgatcacc tgcttatcag cgcctttcag cacgatttca gtctgagtcg gacattcagc + 541 agtgataccc gcaggcagct gatggtcaac aggatgagag aaacccagag acaggttaat + 601 cacattgcct ttaaccgctg cacggtaacc tacaccaacc agctgcagct tcttagtgaa + 661 gccttcggta acaccgataa ccattgagtt cagcagggca cgcgcggtac cagcctgtgc + 721 ccaaccgtct gcgtaaccat cacgcggacc gaaggtcagg gtattatctg catgtttaac + 781 ttcaacagca tcgttgagag tacgagtcag ctcgccgttt ttacctttga tcgtaataac + 841 ctgaccgttg atttttacgt caacgccggc aggaacaacg accggtgctt tagcaacacg + 901 agacattttt tcc + +// diff --git a/microbiorust-py/.gitignore b/microbiorust-py/.gitignore index e8eea5f..428cb0b 100644 --- a/microbiorust-py/.gitignore +++ b/microbiorust-py/.gitignore @@ -1,6 +1,8 @@ # Rust target/ +*.dylib* +*.faa # Python __pycache__/ *.pyc diff --git a/microbiorust-py/Cargo.toml b/microbiorust-py/Cargo.toml index e06d125..4a49583 100644 --- a/microbiorust-py/Cargo.toml +++ b/microbiorust-py/Cargo.toml @@ -8,7 +8,7 @@ license = "MIT" keywords = ["bioinformatics","micro","bio","genomics","sequence-analysis"] description = "Python bindings for microBioRust Microbiology friendly bioinformatics Rust functions" categories = ["science::bioinformatics::sequence-analysis", "science::bioinformatics::genomics", "science::bioinformatics","science","data-structures"] -exclude = [".git",".gitignore","src/bin","config.toml","K12_ribo.gbk","Rhiz3841.gbk.gb","Rhiz3841.gbk.gb_out.faa","rust_via_python_countgbk2faa.py","rust_via_python_gbk2faa.py","asv.conf.json","benchmarks/"] +exclude = [".git",".gitignore","tests/", "rust_via*.py", "*.bak", "bp_gbk2faa.py", "for_pip_publ.txt", "*.orig", "*.svg", "*embl", "*.gff", "*.faa", "src/bin","config.toml","K12_ribo.gbk","Rhiz3841.gbk.gb","Rhiz3841.gbk.gb_out.faa","rust_via_python_countgbk2faa.py","rust_via_python_gbk2faa.py","asv.conf.json","benchmarks/"] repository = "https://github.com/microBioRust/microBioRust" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -28,7 +28,7 @@ default = [] extension-module = ["pyo3/extension-module"] [dependencies] -microBioRust = { path = "../microBioRust" } +microBioRust = "0.1.4" microBioRust-seqmetrics = "0.1.3" pythonize = "0.26" pyo3-stub-gen = "0.17.2" diff --git a/microbiorust-py/LICENSE b/microbiorust-py/LICENSE new file mode 100644 index 0000000..a7d8ef9 --- /dev/null +++ b/microbiorust-py/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 LCrossman + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/microbiorust-py/README.md b/microbiorust-py/README.md index 3ffe4c9..6f7af02 100644 --- a/microbiorust-py/README.md +++ b/microbiorust-py/README.md @@ -12,6 +12,11 @@ pip install microbiorust ``` +to use the Python tests with pytest +```bash +python3 -m pytest -s tests/test_mbr.py +``` + Wheels are available for Linux, macOS and Windows (Python 3.10+). No Rust toolchain required. (no requirement to install Rust) @@ -32,6 +37,7 @@ To verify the Python module functions are correctly exposed from Rust: cargo test ``` + --- ## Features diff --git a/microbiorust-py/config.toml.bak b/microbiorust-py/config.toml.bak new file mode 100644 index 0000000..2a3119b --- /dev/null +++ b/microbiorust-py/config.toml.bak @@ -0,0 +1,13 @@ +# .cargo/config.toml + +[target.aarch64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] + +[target.x86_64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] diff --git a/microbiorust-py/for_pip_publ.txt b/microbiorust-py/for_pip_publ.txt new file mode 100644 index 0000000..e416796 --- /dev/null +++ b/microbiorust-py/for_pip_publ.txt @@ -0,0 +1,9 @@ +And only add abi3 in your release CI/workflow: +maturin build --release --features abi3 + +So the full matrix is: +Who Command abi3 +Dev cloning from GitHub maturin develop ❌ no linking issues +You releasing to PyPI maturin build --release --features abi3 ✅ broad wheel +pip user pre-built wheel ✅ already baked in +cargo test cargo test ❌ tests work diff --git a/microbiorust-py/microbiorust/_microbiorust.pyi b/microbiorust-py/microbiorust/_microbiorust.pyi index e0e5498..20f8219 100644 --- a/microbiorust-py/microbiorust/_microbiorust.pyi +++ b/microbiorust-py/microbiorust/_microbiorust.pyi @@ -4,6 +4,21 @@ import builtins import typing +@typing.final +class FaaCollection: + def __len__(self) -> builtins.int: ... + def __contains__(self, tag: builtins.str) -> builtins.bool: ... + def __repr__(self) -> builtins.str: ... + def __getitem__(self, tag: builtins.str) -> PyFaaInfo: ... + def values(self) -> list: ... + def items(self) -> list: ... + def keys(self) -> list: ... + def __iter__(self) -> LocusTagIterator: ... + def to_json(self) -> builtins.str: + r""" + Serializes the collection to a formatted JSON string + """ + @typing.final class FeatureCollection: def __len__(self) -> builtins.int: ... @@ -14,12 +29,39 @@ class FeatureCollection: def items(self) -> list: ... def keys(self) -> list: ... def __iter__(self) -> LocusTagIterator: ... + def to_json(self) -> builtins.str: + r""" + Serializes the collection to a formatted JSON string + """ + +@typing.final +class FfnCollection: + def __len__(self) -> builtins.int: ... + def __contains__(self, tag: builtins.str) -> builtins.bool: ... + def __repr__(self) -> builtins.str: ... + def __getitem__(self, tag: builtins.str) -> PyFfnInfo: ... + def values(self) -> list: ... + def items(self) -> list: ... + def keys(self) -> list: ... + def __iter__(self) -> LocusTagIterator: ... + def to_json(self) -> builtins.str: + r""" + Serializes the collection to a formatted JSON string + """ @typing.final class LocusTagIterator: def __iter__(self) -> LocusTagIterator: ... def __next__(self) -> typing.Optional[builtins.str]: ... +@typing.final +class PyFaaInfo: + @property + def locus_tag(self) -> builtins.str: ... + @property + def faa(self) -> builtins.str: ... + def __repr__(self) -> builtins.str: ... + @typing.final class PyFeatureInfo: @property @@ -40,6 +82,14 @@ class PyFeatureInfo: def extras(self) -> builtins.list[builtins.str]: ... def __repr__(self) -> builtins.str: ... +@typing.final +class PyFfnInfo: + @property + def locus_tag(self) -> builtins.str: ... + @property + def ffn(self) -> builtins.str: ... + def __repr__(self) -> builtins.str: ... + @typing.final class PyRecord: def id(self) -> builtins.str: ... @@ -47,6 +97,8 @@ class PyRecord: def locus_tag(self) -> builtins.list[builtins.str]: ... def sequences(self) -> SequenceCollection: ... def features(self) -> FeatureCollection: ... + def faa(self) -> FaaCollection: ... + def ffn(self) -> FfnCollection: ... def __repr__(self) -> builtins.str: ... def __getitem__(self, tag: builtins.str) -> typing.Any: ... @@ -75,6 +127,12 @@ class RecordCollection: def write_faa(self, filename: builtins.str) -> tuple[builtins.str, builtins.int]: ... def write_ffn(self, filename: builtins.str) -> tuple[builtins.str, builtins.int]: ... def write_fna(self, filename: builtins.str) -> tuple[builtins.str, builtins.int]: ... + def values(self) -> list: ... + def items(self) -> list: ... + def to_json(self) -> builtins.str: + r""" + Serializes the RecordCollection to a formatted JSON string + """ @typing.final class SequenceCollection: @@ -86,24 +144,28 @@ class SequenceCollection: def items(self) -> list: ... def keys(self) -> list: ... def __iter__(self) -> LocusTagIterator: ... + def to_json(self) -> builtins.str: + r""" + Serializes the collection to a formatted JSON string + """ def amino_counts(seq: builtins.str) -> builtins.dict[builtins.str, builtins.int]: ... def amino_percentage(seq: builtins.str) -> builtins.dict[builtins.str, builtins.float]: ... -def embl_to_faa(filename: builtins.str) -> SequenceCollection: ... +def embl_to_faa(filename: builtins.str) -> FaaCollection: ... -def embl_to_ffn(filename: builtins.str) -> SequenceCollection: ... +def embl_to_ffn(filename: builtins.str) -> FfnCollection: ... def embl_to_fna(filename: builtins.str) -> RecordCollection: ... def embl_to_gff(filename: builtins.str, dna: builtins.bool) -> None: ... -def gbk_to_faa(filename: builtins.str) -> SequenceCollection: ... +def gbk_to_faa(filename: builtins.str) -> FaaCollection: ... def gbk_to_faa_count(filename: builtins.str) -> builtins.int: ... -def gbk_to_ffn(filename: builtins.str) -> SequenceCollection: ... +def gbk_to_ffn(filename: builtins.str) -> FfnCollection: ... def gbk_to_fna(filename: builtins.str) -> RecordCollection: ... diff --git a/microbiorust-py/pyproject.toml b/microbiorust-py/pyproject.toml index a83e338..3ca6fcd 100644 --- a/microbiorust-py/pyproject.toml +++ b/microbiorust-py/pyproject.toml @@ -12,17 +12,23 @@ authors = [ { name = "LCrossman" }, { name = "microBioRust Community" } ] +keywords = ["bioinformatics", "genomics", "microbial genomics", "bacteria", "genbank", "embl", "gff3", "microbiorust", "rust"] description = "Python bindings for microbiorust, Microbiology friendly bioinformatics functions" classifiers = [ "Programming Language :: Rust", - "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Intended Audience :: Science/Research", "Topic :: Scientific/Engineering :: Bio-Informatics", "License :: OSI Approved :: MIT License", - "Development Status :: 3 - Alpha", + "Development Status :: 4 - Beta", ] +license = {file = "LICENSE"} + [project.urls] Homepage = "https://microbiorust.github.io/docs/" Repository = "https://github.com/microBioRust/microBioRust" @@ -38,7 +44,7 @@ include = [ "microbiorust/*.pyi", "microbiorust/py.typed" ] -exclude = ["src/bin/*", "benchmarks/*", "tests/*"] +exclude = ["src/bin/*", "*.embl", "*.gb", "*.gff", "*.faa", "asv.conf.json", "rust_via*.py", "bp_gbk2faa.py", "*.svg", "for_pip_publ.txt", "*.bak","benchmarks/*", "tests/*", ".github/*", "docs/*", ".gitignore"] [tool.ruff] line-length = 127 diff --git a/microbiorust-py/tests/__pycache__/test_mbr.cpython-313-pytest-9.0.2.pyc b/microbiorust-py/tests/__pycache__/test_mbr.cpython-313-pytest-9.0.2.pyc index e235002..dd6b293 100644 Binary files a/microbiorust-py/tests/__pycache__/test_mbr.cpython-313-pytest-9.0.2.pyc and b/microbiorust-py/tests/__pycache__/test_mbr.cpython-313-pytest-9.0.2.pyc differ