diff --git a/.github/workflows/conventional-prs.yml b/.github/workflows/conventional-prs.yml index 82028b7..d0f5164 100644 --- a/.github/workflows/conventional-prs.yml +++ b/.github/workflows/conventional-prs.yml @@ -1,4 +1,5 @@ -name: Lint PR +name: Conventional PRs + on: pull_request_target: types: @@ -11,10 +12,5 @@ permissions: pull-requests: read jobs: - main: - name: Validate PR title - runs-on: ubuntu-latest - steps: - - uses: amannn/action-semantic-pull-request@v5 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + conventional-prs: + uses: MPUSP/mpusp-github-actions/.github/workflows/conventional-prs.yml@main diff --git a/.github/workflows/deploy-apptainer.yml b/.github/workflows/deploy-apptainer.yml new file mode 100644 index 0000000..aa93cbc --- /dev/null +++ b/.github/workflows/deploy-apptainer.yml @@ -0,0 +1,17 @@ +name: Deploy Apptainer + +on: + workflow_run: + workflows: ["Release Please"] + types: + - completed + workflow_dispatch: + +permissions: + contents: read + packages: write + +jobs: + deploy-apptainer: + if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }} + uses: MPUSP/mpusp-github-actions/.github/workflows/deploy-apptainer.yml@main diff --git a/.github/workflows/deploy_apptainer.yml b/.github/workflows/deploy_apptainer.yml deleted file mode 100644 index e7b8081..0000000 --- a/.github/workflows/deploy_apptainer.yml +++ /dev/null @@ -1,60 +0,0 @@ -name: Deploy Apptainer - -on: - workflow_run: - workflows: ["release-please"] - types: - - completed - workflow_dispatch: - -jobs: - build_and_push: - runs-on: ubuntu-latest - permissions: - contents: read - packages: write - if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }} - steps: - - name: checkout repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: add apptainer source - shell: bash - run: | - sudo add-apt-repository -y ppa:apptainer/ppa - sudo apt-get update - - - name: create dockerfile - uses: snakemake/snakemake-github-action@v2 - with: - directory: . - snakefile: workflow/Snakefile - install-apptainer: true - args: "--cores 1" - task: containerize - - - name: create apptainer recipe - shell: bash - run: | - pip install spython - sed -i "2i RUN apt-get update && apt-get install -y curl" Dockerfile - spython recipe Dockerfile > apptainer.def - sed -i 's/\/environment.yaml\/environment.yaml$/\/environment.yaml/' apptainer.def - - - name: create apptainer image - shell: bash - run: | - sudo apt-get install -y uidmap - apptainer build --fakeroot apptainer.sif apptainer.def - - - name: authenticate to GHCR - run: | - echo ${{ secrets.GITHUB_TOKEN }} | apptainer registry login -u ${{ github.actor }} --password-stdin oras://ghcr.io - - - name: push apptainer to GHCR - run: | - REPO=$(echo "${{ github.repository }}" | tr '[:upper:]' '[:lower:]') - echo "Pushing apptainer to: oras://ghcr.io/${REPO}:latest" - apptainer push apptainer.sif "oras://ghcr.io/${REPO}:latest" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml deleted file mode 100644 index f1d5e40..0000000 --- a/.github/workflows/main.yml +++ /dev/null @@ -1,57 +0,0 @@ -name: CI - -on: - push: - branches: [main] - pull_request: - branches: [main] - -jobs: - Formatting: - runs-on: ubuntu-latest - if: ${{ github.actor != 'github-actions[bot]' }} - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Test formatting - uses: super-linter/super-linter@v7 - env: - VALIDATE_ALL_CODEBASE: false - DEFAULT_BRANCH: main - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - VALIDATE_SNAKEMAKE_SNAKEFMT: true - VALIDATE_YAML_PRETTIER: true - - Linting: - runs-on: ubuntu-latest - if: ${{ github.actor != 'github-actions[bot]' }} - steps: - - uses: actions/checkout@v4 - - name: Test linting workflow - uses: snakemake/snakemake-github-action@v2.0.0 - with: - directory: . - snakefile: workflow/Snakefile - args: "--lint" - - Testing: - runs-on: ubuntu-latest - if: ${{ github.actor != 'github-actions[bot]' }} - needs: - - Formatting - steps: - - uses: actions/checkout@v4 - - name: Test run workflow - uses: snakemake/snakemake-github-action@v2.0.0 - with: - directory: .test - snakefile: workflow/Snakefile - args: "--sdm conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache" - - - name: Test report - uses: snakemake/snakemake-github-action@v2.0.0 - with: - directory: .test - snakefile: workflow/Snakefile - args: "--cores 1 --report report.zip -n" diff --git a/.github/workflows/release-please.yml b/.github/workflows/release-please.yml index 78dcfea..b103aa0 100644 --- a/.github/workflows/release-please.yml +++ b/.github/workflows/release-please.yml @@ -1,20 +1,14 @@ +name: Release Please + on: push: - branches: - - main + branches: [main] permissions: contents: write pull-requests: write issues: write -name: release-please - jobs: release-please: - runs-on: ubuntu-latest - steps: - - uses: googleapis/release-please-action@v4 - with: - token: ${{ secrets.GITHUB_TOKEN }} - release-type: simple + uses: MPUSP/mpusp-github-actions/.github/workflows/release-please.yml@main diff --git a/.github/workflows/snakemake-tests.yml b/.github/workflows/snakemake-tests.yml new file mode 100644 index 0000000..7e58b06 --- /dev/null +++ b/.github/workflows/snakemake-tests.yml @@ -0,0 +1,12 @@ +name: Snakemake Tests + +on: + pull_request: + branches: [main] + +jobs: + snakemake-tests: + uses: MPUSP/mpusp-github-actions/.github/workflows/snakemake-tests.yml@main + with: + cores: 2 + dryrun: false diff --git a/.test/config/config.yml b/.test/config/config.yml index 773cadc..dd50d27 100644 --- a/.test/config/config.yml +++ b/.test/config/config.yml @@ -1,6 +1,10 @@ samplesheet: "config/samples.csv" tool: ["prokka"] +reference: + fasta: "" + gff: "" + pgap: bin: "path/to/pgap.py" use_yaml_config: True @@ -18,8 +22,6 @@ bakta: extra: "--keep-contig-headers --compliant" quast: - reference_fasta: "" - reference_gff: "" extra: "" panaroo: @@ -27,3 +29,7 @@ panaroo: remove_source: "cmsearch" remove_feature: "tRNA|rRNA|ncRNA|exon|sequence_feature" extra: "--clean-mode strict --remove-invalid-genes" + +fastani: + skip: False + extra: "" diff --git a/README.md b/README.md index 1368e45..def7e9b 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # snakemake-assembly-postprocessing [![Snakemake](https://img.shields.io/badge/snakemake-≥8.24.1-brightgreen.svg)](https://snakemake.github.io) -[![GitHub actions status](https://github.com/MPUSP/snakemake-assembly-postprocessing/actions/workflows/main.yml/badge.svg)](https://github.com/MPUSP/snakemake-assembly-postprocessing/actions/workflows/main.yml) +[![GitHub Actions](https://github.com/MPUSP/snakemake-assembly-postprocessing/actions/workflows/snakemake-tests.yml/badge.svg)](https://github.com/MPUSP/snakemake-assembly-postprocessing/actions/workflows/snakemake-tests.yml) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with apptainer](https://img.shields.io/badge/run%20with-apptainer-1D355C.svg?labelColor=000000)](https://apptainer.org/) [![workflow catalog](https://img.shields.io/badge/Snakemake%20workflow%20catalog-darkgreen)](https://snakemake.github.io/snakemake-workflow-catalog/docs/workflows/MPUSP/snakemake-assembly-postprocessing) diff --git a/config/README.md b/config/README.md index d864e26..43c77d8 100644 --- a/config/README.md +++ b/config/README.md @@ -9,6 +9,7 @@ A Snakemake workflow for the post-processing of microbial genome assemblies. 3. [bakta](https://github.com/oschwengers/bakta), a fast, alignment-free annotation tool. Note: Bakta will automatically download its companion database from zenodo (light: 1.5 GB, full: 40 GB) 3. Create a QC report for the assemblies using [Quast](https://github.com/ablab/quast) 4. Create a pangenome analysis (orthologs/homologs) using [Panaroo](https://gthlab.au/panaroo/) +5. Compute pairwise average nucleotide identity (ANI) between the assemblies using [FastANI](https://github.com/ParBLiSS/FastANI) and plot a phylogenetic tree based on the ANI distances. ## Running the workflow @@ -22,34 +23,4 @@ The samplesheet table has the following layout: | EC2224 | "Streptococcus pyogenes" | SF370 | SPY | assembly.fasta | | ... | ... | ... | ... | ... | -**Note:** Pangenome analysis with `Panaroo` requires at least two samples. - -### Parameters - -This table lists all parameters that can be used to run the workflow. - -| Parameter | Type | Details | Default | -|:---|:---|:---|:---| -| **samplesheet** | string | Path to the sample sheet file in csv format | | -| **tool** | array[string] | Annotation tool to use (one of `prokka`, `pgap`, `bakta`) | | -| **pgap** | | PGAP configuration object | | -| bin | string | Path to the PGAP script | | -| use_yaml_config | boolean | Whether to use YAML configuration for PGAP | `False` | -| _prepare_yaml_files_ | | Paths to YAML templates for PGAP | | -| generic | string | Path to the generic YAML configuration file | | -| submol | string | Path to the submol YAML configuration file | | -| **prokka** | | Prokka configuration object | | -| center | string | Center name for Prokka annotation (used in sequence IDs) | | -| extra | string | Extra command-line arguments for Prokka | `--addgenes` | -| **bakta** | | Bakta configuration object | | -| download_db | string | Bakta database type (`full`, `light`, or `none`) | `light` | -| existing_db | string | Path to an existing Bakta database (optional). Needs to be combined with `download_db='none'` | `--keep-contig-headers --compliant` | -| extra | string | Extra command-line arguments for Bakta | | -| **quast** | | QUAST configuration object | | -| reference_fasta | string | Path to the reference genome for QUAST | | -| reference_gff | string | Path to the reference annotation for QUAST | -| extra | string | Extra command-line arguments for QUAST | | -| **panaroo** | | Panaroo configuration object | | -| remove_source | string | Source types to remove in Panaroo (regex supported) | `cmsearch` | -| remove_feature | string | Feature types to remove in Panaroo (regex supported) | `tRNA\|rRNA\|ncRNA\|exon\|sequence_feature` | -| extra | string | Extra command-line arguments for Panaroo | `--clean-mode strict --remove-invalid-genes` | +**Note:** Pangenome analysis with `Panaroo` and pairwise similarity analysis with `FastANI` requires at least two samples. diff --git a/config/config.yml b/config/config.yml index 773cadc..dd50d27 100644 --- a/config/config.yml +++ b/config/config.yml @@ -1,6 +1,10 @@ samplesheet: "config/samples.csv" tool: ["prokka"] +reference: + fasta: "" + gff: "" + pgap: bin: "path/to/pgap.py" use_yaml_config: True @@ -18,8 +22,6 @@ bakta: extra: "--keep-contig-headers --compliant" quast: - reference_fasta: "" - reference_gff: "" extra: "" panaroo: @@ -27,3 +29,7 @@ panaroo: remove_source: "cmsearch" remove_feature: "tRNA|rRNA|ncRNA|exon|sequence_feature" extra: "--clean-mode strict --remove-invalid-genes" + +fastani: + skip: False + extra: "" diff --git a/config/schemas/config.schema.yml b/config/schemas/config.schema.yml index 33215e4..b7f32c9 100644 --- a/config/schemas/config.schema.yml +++ b/config/schemas/config.schema.yml @@ -5,34 +5,55 @@ type: object properties: samplesheet: type: string - description: Path to the sample sheet file + description: Path to the sample sheet in CSV format + default: "config/samples.csv" tool: type: array - description: Annotation tool to use + description: Annotation tool(s) to use + default: ["prokka"] items: type: string + description: Name of the annotation tool + default: "prokka" enum: - prokka - pgap - bakta + reference: + type: object + properties: + fasta: + type: string + description: Path to the reference genome in FASTA format + default: "" + gff: + type: string + description: Path to the reference annotation in GFF format (optional) + default: "" + required: + - fasta pgap: type: object properties: bin: type: string description: Path to the PGAP script + default: "path/to/pgap.py" use_yaml_config: type: boolean description: Whether to use YAML configuration for PGAP + default: true prepare_yaml_files: type: object properties: generic: type: string description: Path to the generic YAML configuration file + default: "config/generic.yaml" submol: type: string description: Path to the submol YAML configuration file + default: "config/submol.yaml" required: - generic - submol @@ -46,9 +67,11 @@ properties: center: type: string description: Center name for Prokka annotation (used in sequence IDs) + default: "" extra: type: string description: Extra command-line arguments for Prokka + default: "--addgenes" required: - center - extra @@ -58,12 +81,15 @@ properties: download_db: type: string description: Bakta database type, one of 'full', 'light', or 'none' if existing is used + default: "light" existing_db: type: string description: Path to an existing Bakta database (optional) + default: "" extra: type: string description: Extra command-line arguments for Bakta + default: "--keep-contig-headers --compliant" required: - download_db - existing_db @@ -71,35 +97,49 @@ properties: quast: type: object properties: - reference_fasta: - type: string - description: Path to the reference genome for QUAST - reference_gff: - type: string - description: Path to the reference annotation for QUAST extra: type: string description: Extra command-line arguments for QUAST + default: "" + required: + - extra panaroo: type: object properties: skip: type: boolean description: Whether to skip Panaroo analysis + default: false remove_source: type: string description: Source types to remove in Panaroo (regex supported) + default: "cmsearch" remove_feature: type: string description: Feature types to remove in Panaroo (regex supported) + default: "tRNA|rRNA|ncRNA|exon|sequence_feature" extra: type: string description: Extra command-line arguments for Panaroo - + default: "--clean-mode strict --remove-invalid-genes" + fastani: + type: object + properties: + skip: + type: boolean + description: Whether to skip FastANI analysis + default: false + extra: + type: string + description: Extra command-line arguments for FastANI + default: "" required: - samplesheet - tool + - reference - pgap - prokka - bakta - quast + - panaroo + - fastani diff --git a/workflow/Snakefile b/workflow/Snakefile index aafcb6a..4acfad8 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -66,6 +66,6 @@ onerror: # target rules # ----------------------------------------------------- rule all: + default_target: True input: get_final_input, - default_target: True diff --git a/workflow/envs/fastani.yml b/workflow/envs/fastani.yml new file mode 100644 index 0000000..7104421 --- /dev/null +++ b/workflow/envs/fastani.yml @@ -0,0 +1,7 @@ +name: panaroo +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - fastani=1.34 diff --git a/workflow/envs/panaroo.yml b/workflow/envs/panaroo.yml index 43ee14a..b423d91 100644 --- a/workflow/envs/panaroo.yml +++ b/workflow/envs/panaroo.yml @@ -6,4 +6,5 @@ channels: dependencies: - numpy=1.26.4 - scipy=1.11.4 - - panaroo=1.5.2 + - biopython=1.84 + - panaroo=1.6.0 diff --git a/workflow/rules/annotate.smk b/workflow/rules/annotate.smk index 4b9aab0..e52b7f7 100644 --- a/workflow/rules/annotate.smk +++ b/workflow/rules/annotate.smk @@ -3,10 +3,10 @@ rule get_fasta: get_fasta, output: fasta="results/annotation/pgap/prepare_files/{sample}/genome.fasta", - conda: - "../envs/base.yml" log: "results/annotation/pgap/prepare_files/logs/{sample}_get_fasta.log", + conda: + "../envs/base.yml" shell: "INPUT=$(realpath {input}); " "ln -s ${{INPUT}} {output}; " @@ -19,6 +19,8 @@ rule prepare_yaml_files: output: input_yaml="results/annotation/pgap/prepare_files/{sample}/input.yaml", submol_yaml="results/annotation/pgap/prepare_files/{sample}/submol.yaml", + log: + "results/annotation/pgap/prepare_files/logs/{sample}_prepare_yaml_files.log", conda: "../envs/base.yml" params: @@ -28,8 +30,6 @@ rule prepare_yaml_files: submol=config["pgap"]["prepare_yaml_files"]["submol"], sample="{sample}", pd_samples=samples, - log: - "results/annotation/pgap/prepare_files/logs/{sample}_prepare_yaml_files.log", script: "../scripts/prepare_yaml_files.py" @@ -44,18 +44,17 @@ rule annotate_pgap: output: gff="results/annotation/pgap/{sample}/{sample}.gff", fasta="results/annotation/pgap/{sample}/{sample}.fna", + log: + "results/annotation/pgap/logs/{sample}_pgap.log", conda: "../envs/base.yml" - message: - """--- Running PGAP annotation for sample {wildcards.sample} ---""" params: pgap=config["pgap"]["bin"], use_yaml_config=config["pgap"]["use_yaml_config"], species=lambda wc: samples.loc[wc.sample]["species"], outdir=lambda wc, output: os.path.dirname(output[0]), - threads: 1 - log: - "results/annotation/pgap/logs/{sample}_pgap.log", + message: + """--- Running PGAP annotation for sample {wildcards.sample} ---""" shell: "rm -rf {params.outdir}; " "if [ {params.use_yaml_config} == 'True' ]; then " @@ -83,10 +82,11 @@ rule annotate_prokka: output: gff="results/annotation/prokka/{sample}/{sample}.gff", fasta="results/annotation/prokka/{sample}/{sample}.fna", + log: + "results/annotation/prokka/logs/{sample}_prokka.log", conda: "../envs/prokka.yml" - message: - """--- Running PROKKA annotation for sample {wildcards.sample} ---""" + threads: max(workflow.cores * 0.25, 1) params: prefix=lambda wc: wc.sample, locustag=lambda wc: samples.loc[wc.sample]["id_prefix"], @@ -95,9 +95,8 @@ rule annotate_prokka: strain=lambda wc: samples.loc[wc.sample]["strain"], outdir=lambda wc, output: os.path.dirname(output[0]), extra=config["prokka"]["extra"], - threads: workflow.cores * 0.25 - log: - "results/annotation/prokka/logs/{sample}_prokka.log", + message: + """--- Running PROKKA annotation for sample {wildcards.sample} ---""" shell: """ prokka \ @@ -123,17 +122,17 @@ rule get_bakta_db: "none": directory("results/annotation/bakta/database/custom"), }, ), + log: + "results/annotation/bakta/database/db.log", conda: "../envs/bakta.yml" - message: - """--- Getting BAKTA database for annotation ---""" + threads: max(workflow.cores * 0.25, 1) params: download_db=config["bakta"]["download_db"], existing_db=config["bakta"]["existing_db"], outdir=lambda wc, output: os.path.dirname(output[0]), - threads: workflow.cores * 0.25 - log: - "results/annotation/bakta/database/db.log", + message: + """--- Getting BAKTA database for annotation ---""" shell: """ if [ {params.download_db} != 'none' ]; then @@ -156,10 +155,11 @@ rule annotate_bakta: output: gff="results/annotation/bakta/{sample}/{sample}.gff", fasta="results/annotation/bakta/{sample}/{sample}.fna", + log: + "results/annotation/bakta/logs/{sample}_bakta.log", conda: "../envs/bakta.yml" - message: - """--- Running BAKTA annotation for sample {wildcards.sample} ---""" + threads: max(workflow.cores * 0.25, 1) params: prefix=lambda wc: wc.sample, locustag=lambda wc: format_bakta_locustag(samples.loc[wc.sample]["id_prefix"]), @@ -167,9 +167,8 @@ rule annotate_bakta: strain=lambda wc: samples.loc[wc.sample]["strain"], outdir=lambda wc, output: os.path.dirname(output[0]), extra=config["bakta"]["extra"], - threads: workflow.cores * 0.25 - log: - "results/annotation/bakta/logs/{sample}_bakta.log", + message: + """--- Running BAKTA annotation for sample {wildcards.sample} ---""" shell: """ bakta \ diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 47364a7..26f2d8d 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -4,7 +4,6 @@ import re from snakemake import logging from snakemake.utils import validate - # read sample sheet samples = ( pd.read_csv(config["samplesheet"], sep=",", dtype={"sample": str}) @@ -28,12 +27,9 @@ def get_fasta(wildcards): return samples.loc[sample, "file"] -def get_quast_fasta(wildcards): - return expand( - "results/annotation/{tool}/{sample}/{sample}.fna", - tool=wildcards.tool, - sample=samples.index, - ) +def get_all_fasta(wildcards): + """Get all input fasta files for all samples.""" + return [samples.loc[s, "file"] for s in samples.index] def get_panaroo_gff(wildcards): @@ -55,14 +51,17 @@ def get_panaroo_fasta(wildcards): def get_final_input(wildcards): inputs = [] inputs += expand( - "results/qc/quast/{tool}/report.txt", - tool=config["tool"], + "results/qc/quast/report.txt", ) if len(samples.index) > 1 and not config["panaroo"]["skip"]: inputs += expand( "results/qc/panaroo/{tool}/summary_statistics.txt", tool=config["tool"], ) + if len(samples.index) > 1 and not config["fastani"]["skip"]: + inputs += expand( + "results/qc/fastani/summary.txt", + ) return inputs diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index 6135aa1..c146e26 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -1,28 +1,28 @@ rule quast: input: - fasta=get_quast_fasta, + fasta=get_all_fasta, output: - report="results/qc/quast/{tool}/report.txt", + report="results/qc/quast/report.txt", + log: + "results/qc/quast/quast.log", conda: "../envs/quast.yml" - message: - """--- Running QUAST quality check for all assemblies ---""" + threads: max(workflow.cores * 0.5, 1) params: outdir=lambda wc, output: os.path.dirname(output.report), ref_fasta=( - " ".join(["-r", config["quast"]["reference_fasta"]]) - if config["quast"]["reference_fasta"] + " ".join(["-r", config["reference"]["fasta"]]) + if config["reference"]["fasta"] else [] ), ref_gff=( - " ".join(["-g", config["quast"]["reference_gff"]]) - if config["quast"]["reference_gff"] + " ".join(["-g", config["reference"]["gff"]]) + if config["reference"]["gff"] else [] ), extra=config["quast"]["extra"], - threads: 4 - log: - "results/qc/quast/{tool}/quast.log", + message: + """--- Running QUAST quality check for all assemblies ---""" shell: """ quast \ @@ -36,6 +36,38 @@ rule quast: """ +rule fastani: + input: + fasta=get_all_fasta, + output: + txt="results/qc/fastani/summary.txt", + log: + "results/qc/fastani/fastani.log", + conda: + "../envs/fastani.yml" + threads: max(workflow.cores * 0.5, 1) + params: + outdir=lambda wc, output: os.path.dirname(output.txt), + ref_fasta=( + [config["reference"]["fasta"]] if config["reference"]["fasta"] else [] + ), + extra=config["fastani"]["extra"], + message: + """--- Running FastANI to compare genome similarity (all vs all) ---""" + shell: + """ + printf '%s\n' {input.fasta} > {params.outdir}/input_files.txt; + printf '%s\n' {params.ref_fasta} >> {params.outdir}/input_files.txt; + fastANI \ + --ql {params.outdir}/input_files.txt \ + --rl {params.outdir}/input_files.txt \ + --output {output.txt} \ + --threads {threads} \ + {params.extra} \ + > {log} 2>&1 + """ + + rule prepare_panaroo: input: fasta="results/annotation/{tool}/{sample}/{sample}.fna", @@ -43,15 +75,15 @@ rule prepare_panaroo: output: fasta="results/qc/panaroo/{tool}/prepare/{sample}.fna", gff="results/qc/panaroo/{tool}/prepare/{sample}.gff", + log: + "results/qc/panaroo/{tool}/prepare/{sample}.log", conda: "../envs/panaroo.yml" - message: - """--- Prepare input files for pan-genome alignment ---""" params: remove_source=config["panaroo"]["remove_source"], remove_feature=config["panaroo"]["remove_feature"], - log: - "results/qc/panaroo/{tool}/prepare/{sample}.log", + message: + """--- Prepare input files for pan-genome alignment ---""" shell: """ echo 'Preparing annotation for Panaroo:' > {log}; @@ -70,16 +102,16 @@ rule panaroo: fasta=get_panaroo_fasta, output: stats="results/qc/panaroo/{tool}/summary_statistics.txt", + log: + "results/qc/panaroo/{tool}/panaroo.log", conda: "../envs/panaroo.yml" - message: - """--- Running PANAROO to create pangenome from all annotations ---""" + threads: max(workflow.cores * 0.5, 1) params: outdir=lambda wc, output: os.path.dirname(output.stats), extra=config["panaroo"]["extra"], - threads: 4 - log: - "results/qc/panaroo/{tool}/panaroo.log", + message: + """--- Running PANAROO to create pangenome from all annotations ---""" shell: """ printf '%s\n' {input.gff} | \