Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
# Generated by Cargo
# will have compiled files and executables
debug/
target/
dist/
*.egg-info/
__pycache__/
*.pyc
debug/

*.so
*.abi3.so
*.dylib
*.pyd

expand.rs
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Expand All @@ -25,3 +35,4 @@ env/
venv/
site/
static/
*.gff
12 changes: 8 additions & 4 deletions microBioRust/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "microBioRust"
version = "0.1.3"
edition = "2021"
version = "0.1.4"
edition = "2024"
license = "MIT"
keywords = ["bioinformatics", "micro", "bio", "genomics", "sequence-analysis"]
description = "Microbiology friendly bioinformatics Rust functions"
Expand All @@ -14,7 +14,7 @@ categories = [
]
readme = "README.md"
authors = ["Lisa Crossman and microBioRust community"]
exclude = [".git", ".gitignore", ".dribble.example.embl", "tests/data/"]
exclude = [".git", ".gitignore", ".dribble.example.embl", "tests/", "src/bin","config.toml","K12_ribo.gbk","Rhiz3841.gbk.gb","Rhiz3841.gbk.gb_out.faa","rust_via_python_countgbk2faa.py","rust_via_python_gbk2faa.py","asv.conf.json","benchmarks/"]
repository = "https://github.com/microBioRust/microBioRust"
documentation = "https://microbiorust.github.io/docs/"

Expand All @@ -27,7 +27,11 @@ path = "src/lib.rs"

[[example]]
name = "blast-example"
path = "examples/src/blast_parse.rs"
path = "examples/blast_parse.rs"

[[example]]
name = "convert-to-faa"
path = "examples/convert_to_faa.rs"

[dependencies]
clap = { version = "4.5.19", features = ["derive"] }
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
use anyhow::{Context, Result};
use async_compression::tokio::bufread::GzipDecoder as AsyncGzDecoder;
use anyhow::Result;
use clap::Parser;
use quick_xml::events::Event;
use quick_xml::reader::Reader;
use quick_xml::escape::unescape;
use serde::Serialize;
use serde_json::ser::Serializer as JsonSerializer;
use microBioRust::blast::*;
use std::io::Cursor;
use tokio::io::{self, AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncWriteExt, BufReader};
use tokio::io::AsyncWriteExt;

#[derive(Parser, Debug)]
#[command(name = "blast-parsers", author, version, about = "async microBioRust BLAST parsers: for outfmt6 (single line tabular) and outfmt5 (xml)")]
#[command(
name = "blast-parsers",
author,
version,
about = "async microBioRust BLAST parsers: for outfmt6 (single line tabular) and outfmt5 (xml)"
)]
struct Cli {
///Use .gz for gzip-compressed files.
#[arg(short, long, default_value = "-")]
Expand Down Expand Up @@ -44,7 +42,11 @@ async fn main() -> Result<()> {
buf.push(b'\n');
tokio::io::stdout().write_all(&buf).await?;
} else {
println!("query {:?} hits {}", iter_rec.query_def, iter_rec.hits.len());
println!(
"query {:?} hits {}",
iter_rec.query_def,
iter_rec.hits.len()
);
}
}
Err(e) => eprintln!("xml parse error: {}", e),
Expand Down
34 changes: 34 additions & 0 deletions microBioRust/examples/convert_to_faa.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
use clap::Parser;
use std::{
fs::File,
io::{Write, BufWriter},
};
use microBioRust::{
genbank,
};

#[derive(Parser, Debug)]
#[clap(author, version, about)]
struct Arguments {
#[clap(short, long)]
filename: String,
#[clap(short, long)]
output: String,
}

fn main() -> Result<(), anyhow::Error> {
let args = Arguments::parse();
let records = genbank!(&args.filename);
let file = File::create(&args.output)?;
let mut writer = BufWriter::new(file);
for record in records {
for (k, _v) in &record.cds.attributes {
if let Some(seq) = record.seq_features.get_sequence_faa(k) {
writeln!(writer, ">{}|{}\n{}", &record.id, &k, seq)?;
}
}
}
writer.flush()?;
Ok(())
}

18 changes: 9 additions & 9 deletions microBioRust/src/embl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,14 +263,14 @@
//!```
//!

use anyhow::{anyhow, Context};
use anyhow::{Context, anyhow};
use bio::alphabets::dna::revcomp;
use chrono::prelude::*;
use lazy_static::lazy_static;
use paste::paste;
use protein_translate::translate;
use serde::Serialize;
use regex::Regex;
use serde::Serialize;
use std::{
collections::{BTreeMap, HashSet},
convert::{AsRef, TryInto},
Expand Down Expand Up @@ -600,8 +600,8 @@ where
//println!("designated codon start {:?} {:?}", &codon_start, &locus_tag);
}
if self.line_buffer.contains("/gene=") {
let gen: Vec<&str> = self.line_buffer.split('\"').collect();
gene = gen[1].to_string();
let genes: Vec<&str> = self.line_buffer.split('\"').collect();
gene = genes[1].to_string();
//println!("gene designated {:?} {:?}", &gene, &locus_tag);
}
if self.line_buffer.contains("/product") {
Expand Down Expand Up @@ -1542,7 +1542,7 @@ mod tests {
#[allow(unused_assignments)]
#[allow(unused_imports)]
fn test_read_file() {
let content = std::fs::read_to_string("example.embl").expect("error reading file");
let content = std::fs::read_to_string("tests/example.embl").expect("error reading file");
assert!(content.contains("ID"));
assert!(content.len() > 0);
}
Expand All @@ -1553,7 +1553,7 @@ mod tests {
#[allow(unused_assignments)]
#[allow(unused_imports)]
fn test_parse_embl() {
let file_embl = "example.embl";
let file_embl = "tests/example.embl";
let records = embl!(&file_embl);
assert!(records.len() > 0);
}
Expand All @@ -1564,7 +1564,7 @@ mod tests {
#[allow(unused_assignments)]
#[allow(unused_imports)]
fn test_parse_source_attributes() {
let file_embl = "example.embl";
let file_embl = "tests/example.embl";
let records = embl!(&file_embl);
if let Some(record) = records.first() {
if let Some((key, val)) = record.source_map.source_attributes.first_key_value() {
Expand All @@ -1579,7 +1579,7 @@ mod tests {
#[allow(unused_assignments)]
#[allow(unused_imports)]
fn test_parse_cds_attributes() {
let file_embl = "example.embl";
let file_embl = "tests/example.embl";
let records = embl!(&file_embl);
if let Some(record) = records.first() {
if let Some((locus_tag, vals)) = record.cds.attributes.first_key_value() {
Expand All @@ -1598,7 +1598,7 @@ mod tests {
#[allow(unused_assignments)]
#[allow(unused_imports)]
fn test_parse_sequence_attributes() {
let file_embl = "example.embl";
let file_embl = "tests/example.embl";
let records = embl!(&file_embl);
if let Some(record) = records.first() {
if let Some((key, vals)) = record.cds.attributes.first_key_value() {
Expand Down
18 changes: 9 additions & 9 deletions microBioRust/src/gbk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@
//! let mut read_counter: u32 = 0;
//! let mut seq_region: BTreeMap<String, (u32,u32)> = BTreeMap::new();
//! let mut record_vec: Vec<Record> = Vec::new();
//! loop {
//! loop {
//! match records.next() {
//! Some(Ok(mut record)) => {
//! println!("next record");
Expand Down Expand Up @@ -275,7 +275,7 @@
//!```
//!

use anyhow::{anyhow, Context};
use anyhow::{Context, anyhow};
use bio::alphabets::dna::revcomp;
use chrono::prelude::*;
use itertools::Itertools;
Expand Down Expand Up @@ -681,8 +681,8 @@ where
//println!("designated codon start {:?} {:?}", &codon_start, &locus_tag);
}
if self.line_buffer.contains("/gene=") {
let gen: Vec<&str> = self.line_buffer.split('\"').collect();
gene = gen[1].to_string();
let genes: Vec<&str> = self.line_buffer.split('\"').collect();
gene = genes[1].to_string();
//println!("gene designated {:?} {:?}", &gene, &locus_tag);
}
if self.line_buffer.contains("/product") {
Expand Down Expand Up @@ -1732,7 +1732,7 @@ mod tests {
#[allow(unused_assignments)]
#[allow(unused_imports)]
fn test_read_file() {
let content = std::fs::read_to_string("K12_ribo.gbk").expect("error reading file");
let content = std::fs::read_to_string("tests/K12_ribo.gbk").expect("error reading file");
assert!(content.contains("LOCUS"));
assert!(content.len() > 0);
}
Expand All @@ -1743,7 +1743,7 @@ mod tests {
#[allow(unused_assignments)]
#[allow(unused_imports)]
fn test_parse_gbk() {
let file_gbk = "K12_ribo.gbk";
let file_gbk = "tests/K12_ribo.gbk";
let records = genbank!(&file_gbk);
assert!(records.len() > 0);
}
Expand All @@ -1754,7 +1754,7 @@ mod tests {
#[allow(unused_assignments)]
#[allow(unused_imports)]
fn test_parse_source_attributes() {
let file_gbk = "K12_ribo.gbk";
let file_gbk = "tests/K12_ribo.gbk";
let records = genbank!(&file_gbk);
if let Some(record) = records.first() {
if let Some((key, val)) = record.source_map.source_attributes.first_key_value() {
Expand All @@ -1769,7 +1769,7 @@ mod tests {
#[allow(unused_assignments)]
#[allow(unused_imports)]
fn test_parse_cds_attributes() {
let file_gbk = "K12_ribo.gbk";
let file_gbk = "tests/K12_ribo.gbk";
let records = genbank!(&file_gbk);
if let Some(record) = records.first() {
if let Some((locus_tag, vals)) = record.cds.attributes.first_key_value() {
Expand All @@ -1788,7 +1788,7 @@ mod tests {
#[allow(unused_assignments)]
#[allow(unused_imports)]
fn test_parse_sequence_attributes() {
let file_gbk = "K12_ribo.gbk";
let file_gbk = "tests/K12_ribo.gbk";
let records = genbank!(&file_gbk);
if let Some(record) = records.first() {
if let Some((key, vals)) = record.cds.attributes.first_key_value() {
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
6 changes: 6 additions & 0 deletions microBioRust/tests/cleaned_dna.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
>seq1
ATGC-ATGCATGCATGC
>seq2
ATGCAATGCTTGCATGC
>seq3
TTGCAATCCATGCAAGC
2 changes: 1 addition & 1 deletion microBioRust/tests/embl_to_faa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use microBioRust::embl::Reader;
use std::fs;
#[test]
fn embl_to_faa() -> Result<(), anyhow::Error> {
let file_embl = fs::File::open("example.embl")?;
let file_embl = fs::File::open("tests/example.embl")?;
let reader = Reader::new(file_embl);
let mut records = reader.records();
let mut read_counter: u32 = 0;
Expand Down
2 changes: 1 addition & 1 deletion microBioRust/tests/embl_to_ffn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use microBioRust::embl::Reader;
use std::fs;
#[test]
pub fn embl_to_ffn() -> Result<(), anyhow::Error> {
let file_embl = fs::File::open("example.embl")?;
let file_embl = fs::File::open("tests/example.embl")?;
let reader = Reader::new(file_embl);
let mut records = reader.records();
let mut read_counter: u32 = 0;
Expand Down
4 changes: 2 additions & 2 deletions microBioRust/tests/embl_to_gff.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use microBioRust::embl::{gff_write, Reader, Record};
use microBioRust::embl::{Reader, Record, gff_write};
use std::collections::BTreeMap;
use std::fs;

#[test]
fn test_embl_to_gff() -> std::io::Result<()> {
let file_embl = fs::File::open("example.embl")?;
let file_embl = fs::File::open("tests/example.embl")?;
let reader = Reader::new(file_embl);
let mut records = reader.records();
let mut read_counter: u32 = 0;
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion microBioRust/tests/genbank_to_faa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use microBioRust::gbk::Reader;
use std::fs;
#[test]
pub fn genbank_to_faa() -> Result<(), anyhow::Error> {
let file_gbk = fs::File::open("K12_ribo.gbk")?;
let file_gbk = fs::File::open("tests/K12_ribo.gbk")?;
let reader = Reader::new(file_gbk);
let mut records = reader.records();
let mut read_counter: u32 = 0;
Expand Down
2 changes: 1 addition & 1 deletion microBioRust/tests/genbank_to_ffn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use microBioRust::gbk::Reader;
use std::fs;
#[test]
pub fn genbank_to_ffn() -> Result<(), anyhow::Error> {
let file_gbk = fs::File::open("K12_ribo.gbk")?;
let file_gbk = fs::File::open("tests/K12_ribo.gbk")?;
let reader = Reader::new(file_gbk);
let mut records = reader.records();
let mut read_counter: u32 = 0;
Expand Down
4 changes: 2 additions & 2 deletions microBioRust/tests/genbank_to_gff.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use microBioRust::gbk::{gff_write, Reader, Record};
use microBioRust::gbk::{Reader, Record, gff_write};
use std::collections::BTreeMap;
use std::fs;
use std::io;
#[test]
pub fn genbank_to_gff() -> io::Result<()> {
let file_gbk = fs::File::open("K12_ribo.gbk")?;
let file_gbk = fs::File::open("tests/K12_ribo.gbk")?;
let _prev_start: u32 = 0;
let mut prev_end: u32 = 0;
let reader = Reader::new(file_gbk);
Expand Down
52 changes: 52 additions & 0 deletions microBioRust/tests/new_output_embl.gbk
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
LOCUS source_1 928 bp DNA linear CON 01-JUN-2026
DEFINITION Escherichia coli K-12 substr. MG1655.
ACCESSION source_1
KEYWORDS .
SOURCE Escherichia coli K-12 substr. MG1655
ORGANISM Escherichia coli K-12 substr. MG1655
FEATURES Location/Qualifiers
source 1..910
/organism="K-12 substr. MG1655"
/mol_type="DNA"
/strain="K-12 substr. MG1655"
/db_xref="PRJNA57779"
gene complement(1..354)
/locus_tag="b3304"
CDS complement(1..354)
/locus_tag="b3304"
/codon_start="1"
/gene="rplR"
/translation="MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGS
LVAASTVEKAIAEQLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQAL
DAAREAGLQ"
/product="50S ribosomal subunit protein L18"
gene complement(364..897)
/locus_tag="b3305"
CDS complement(364..897)
/locus_tag="b3305"
/codon_start="1"
/gene="rplF"
/translation="MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKH
NTLTFGPRDGYADGWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLS
GFSHPVDHQLPAGITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYAD
VVRTKEAKK"
/product="50S ribosomal subunit protein L6"
ORIGIN
1 acctctacct tagaactgaa ggccagcttc acgggcagca tctgccagtg cctggacacg
61 accatgatat tggaacccgg aacggtcaaa ggatacatct ttgatgcctt tttccagagc
121 gcgttcagcg acagctttac ccacagctgc agccgcgtct ttgttaccgg tgtacttcag
181 ttgttcagcg atagcttttt ctacagtaga agcagctacc agaacttcag aaccgttcgg
241 tgcaattacc tgtgcgtaaa tgtgacgcgg ggtacgatgt accaccaggc gagttgcgcc
301 cagctcctgg agcttgcggc gtgcgcgggt cgcacgacgg atacgagcag atttcttatc
361 catagtgtta ccttacttct tcttagcctc tttggtacgc acgacttcgt cggcgtaacg
421 aacacccttg cctttataag gctcaggacg acggtaggcg cgcagatccg ctgcaacctg
481 gccgatcacc tgcttatcag cgcctttcag cacgatttca gtctgagtcg gacattcagc
541 agtgataccc gcaggcagct gatggtcaac aggatgagag aaacccagag acaggttaat
601 cacattgcct ttaaccgctg cacggtaacc tacaccaacc agctgcagct tcttagtgaa
661 gccttcggta acaccgataa ccattgagtt cagcagggca cgcgcggtac cagcctgtgc
721 ccaaccgtct gcgtaaccat cacgcggacc gaaggtcagg gtattatctg catgtttaac
781 ttcaacagca tcgttgagag tacgagtcag ctcgccgttt ttacctttga tcgtaataac
841 ctgaccgttg atttttacgt caacgccggc aggaacaacg accggtgctt tagcaacacg
901 agacattttt tcc

//
Loading
Loading