From 6804755bd2d0f53f465266885c9751ef78bb8a1a Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Tue, 3 Dec 2024 18:31:07 +0100 Subject: [PATCH 01/33] feat: bowtie2 can now sort with samtools and picard --- .../align/environment.linux-64.pin.txt | 87 ++++++++--- bio/bowtie2/align/environment.yaml | 1 + bio/bowtie2/align/meta.yaml | 5 + bio/bowtie2/align/test/Snakefile | 40 +++++ bio/bowtie2/align/wrapper.py | 146 ++++++++++++++---- test_wrappers.py | 31 +++- 6 files changed, 250 insertions(+), 60 deletions(-) diff --git a/bio/bowtie2/align/environment.linux-64.pin.txt b/bio/bowtie2/align/environment.linux-64.pin.txt index f60ee3221ab..fc5ce3164ab 100644 --- a/bio/bowtie2/align/environment.linux-64.pin.txt +++ b/bio/bowtie2/align/environment.linux-64.pin.txt @@ -1,46 +1,89 @@ # This file may be used to create an environment using: # $ conda create --name --file # platform: linux-64 +# created-by: conda 24.9.2 @EXPLICIT https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.8.30-hbcca054_0.conda#c27d1c142233b5bc9ca570c6e2e0c244 -https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-hf3520f5_7.conda#b80f2f396ca2c28b8c14c437a4ed1e74 -https://conda.anaconda.org/conda-forge/noarch/tzdata-2024a-h8827d51_1.conda#8bfdead4e0fff0383ae4c9c50d0531bd -https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.1.0-h77fa898_1.conda#23c255b008c4f2ae008f81edcabaca89 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 +https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb +https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-h77eed37_3.conda#49023d73832ef61042f6a237cb2687e7 +https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.43-h712a8e2_2.conda#048b02e3962f066da18efe3a21b77672 +https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.13-5_cp313.conda#381bbd2a92c863f640a55b6ff3c35161 +https://conda.anaconda.org/conda-forge/noarch/tzdata-2024b-hc8b5060_0.conda#8ac3367aafb1cc0a068483c580af8015 +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29 +https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.2.0-h77fa898_1.conda#cc3573974587f12dda90d96e3e55a702 https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d -https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.1.0-h77fa898_1.conda#002ef4463dd1e2b44a94a4ace468f5d2 -https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.3-h5888daf_0.conda#59f4c43bb1b5ef1c71946ff2cbf59524 -https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.1.0-h69a702a_1.conda#1efc0ad219877a73ef977af7dbb51f17 -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.1.0-hc0a3c3a_1.conda#9dbb9699ea467983ba8a4ba89b08b066 -https://conda.anaconda.org/conda-forge/linux-64/openssl-3.3.2-hb9d3cd8_0.conda#4d638782050ab6faa27275bed57e9b4e +https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab +https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.2.0-h77fa898_1.conda#3cb76c3f10d3bc7f1105b2fc9db984df +https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.13-hb9d3cd8_0.conda#ae1370588aa6a5157c34c73e9bbb36a0 +https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.34.3-hb9d3cd8_1.conda#ee228789a85f961d14567252a03e725f +https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.4-h5888daf_0.conda#db833e03127376d461e1e13e76f09b6c +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.2.0-h69a702a_1.conda#e39480b9ca41323497b05492a63bc35b +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.2.0-hc0a3c3a_1.conda#234a5554c53625688d51062645337328 +https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8 +https://conda.anaconda.org/conda-forge/linux-64/openssl-3.4.0-hb9d3cd8_0.conda#23cc74f77eb99315c0360ec3533147a9 +https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-hb9d3cd8_1002.conda#b3c17d95b5a10c6e64a21fa17573e70e +https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hb9d3cd8_1.conda#19608a9656912805b2b9a2f6bd257b04 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hb9d3cd8_1.conda#77cbc488235ebbaab2b6e912d3934bae +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.5-hb9d3cd8_0.conda#8035c64cb77ed555e3f150b7b3972480 +https://conda.anaconda.org/conda-forge/linux-64/xorg-xorgproto-2024.1-hb9d3cd8_1.conda#7c21106b851ec72c037b162c216d8f05 https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda#62ee74e96c5ebb0af99386de58cf9553 -https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.33.1-heb4867d_0.conda#0d3c60291342c0c025db231353376dfb +https://conda.anaconda.org/conda-forge/linux-64/giflib-5.2.2-hd590300_0.conda#3bf7b9fd5a7136126e0234db4b87c8b6 https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.20-hd590300_0.conda#8e88f9389f1165d7c0936fe40d9a9a79 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.21-h4bc722e_0.conda#36ce76665bf67f5aac36be7a0d21b7f3 https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda#172bf1cd1ff8629f2b1179945ed45055 https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 -https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7 -https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.1.0-h4852527_1.conda#bd2598399a70bb86d8218e95548d735e +https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e +https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8 +https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-h4bc722e_0.conda#aeb98fdeb2e8f25d43ef71fbacbeec80 +https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.44-hadc24fc_0.conda#f4cc49d7aa68316213e4b12be35308d1 +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.47.0-hadc24fc_1.conda#b6f02b52a174e612e89548f4663ce56a +https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.1-hf672d98_0.conda#be2de152d8073ef1c01b7728475f2fe7 +https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.2.0-h4852527_1.conda#8371ac6457591af2cf6159439c1fd051 https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b +https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.conda#b26e8aa824079e1be0294e7152ca4559 +https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.17.0-h8a09558_0.conda#92ed62436b625154323d40d5f2f11dd7 https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc -https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-h4ab18f5_1.conda#57d7dc60e9325e3de37ff8dffd18e814 https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-he02047a_1.conda#70caf8bb6cf39a0b6b7efc885f51c0fe +https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 +https://conda.anaconda.org/conda-forge/linux-64/zlib-1.3.1-hb9d3cd8_2.conda#c9f075ab2f33b3bbee9e62d4ad0a6cd8 +https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb +https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c +https://conda.anaconda.org/conda-forge/linux-64/icu-75.1-he02047a_0.conda#8b189310083baabfb622af68fd9d3ae3 +https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1 -https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.58.0-h47da74e_1.conda#700ac6ea6d53d5510591c4344d5c989a -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.46.1-hadc24fc_0.conda#36f79405ab16bf271edb55b213836dac -https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda#1f5a58e686b13bcfde88b93f547d23fe +https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.64.0-h161d5f1_0.conda#19e57602824042dfd0446292ef90488b +https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.44-hba22ea6_2.conda#df359c09c41cd186fffb93a2d87aa6f5 https://conda.anaconda.org/conda-forge/linux-64/perl-5.32.1-7_hd590300_perl5.conda#f2cfec9406850991f4e3d960cc9e3321 +https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.2-h59595ed_0.conda#71004cbf7924e19c02746ccde9fd7123 https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 -https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc +https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-he73a12e_1.conda#05a8ea5f446de33006171a7afe6ae857 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.10-h4f16b4b_0.conda#0b666058a179b744a622d0a4a0c56353 https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda#4d056880988120e29d75bfff282e0f45 +https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.15.0-h7e30c49_1.conda#8f5b0b297b59e1ac160ad4beec99dbee https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda#3f43953b7d3fb3aaa1d0d0723d91e368 -https://conda.anaconda.org/conda-forge/linux-64/python-3.12.5-h2ad013b_0_cpython.conda#9c56c4df45f6571b13111d8df2448692 +https://conda.anaconda.org/conda-forge/linux-64/libglib-2.82.2-h2ff4ddf_0.conda#13e8e54035ddd2b91875ba399f0f7c04 +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.7.0-h6565414_0.conda#80eaf80d84668fa5620ac9ec1b4bf56f +https://conda.anaconda.org/conda-forge/linux-64/python-3.13.0-h9ebbce0_101_cp313.conda#f4fea9d5bb3f2e61a39950a7ab70ee4e +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.6-hb9d3cd8_0.conda#febbab7d15033c913d53c7a2c102309d +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxfixes-6.0.1-hb9d3cd8_0.conda#4bdb303603e9821baf5fe5fdff1dc8f8 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hb9d3cd8_1.conda#a7a49a8b85122b49214798321e2e96b4 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxt-1.3.1-hb9d3cd8_0.conda#279b0de5f6ba95457190a1c459a64e31 https://conda.anaconda.org/bioconda/linux-64/bowtie2-2.5.4-h7071971_4.tar.bz2#69822858766e6c8b12ae90d78d54d8ea -https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.10.0-hbbe4b11_0.conda#657ea309ad90675ef144e7d27a271ab9 -https://conda.anaconda.org/conda-forge/noarch/setuptools-73.0.1-pyhd8ed1ab_0.conda#f0b618d7673d1b2464f600b34d912f6f +https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-hebfffa5_3.conda#fceaedf1cdbcb02df9699a0d9b005292 +https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5 +https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3 +https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.10.1-hbbe4b11_0.conda#6e801c50a40301f6978c53976917b277 +https://conda.anaconda.org/conda-forge/noarch/pip-24.3.1-pyh145f28c_0.conda#ca3afe2d7b893a8c8cdf489d30a2b1a3 https://conda.anaconda.org/bioconda/noarch/snakemake-wrapper-utils-0.6.2-pyhdfd78af_0.tar.bz2#fd8759bbd04116eace828c4fab906096 -https://conda.anaconda.org/conda-forge/noarch/wheel-0.44.0-pyhd8ed1ab_0.conda#d44e3b085abcaef02983c6305b84b584 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxi-1.8.2-hb9d3cd8_0.conda#17dcc85db3c7886650b8908b183d6876 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrandr-1.5.4-hb9d3cd8_0.conda#2de7f99d6581a4a7adbff607b5c278ca +https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-9.0.0-hda332d3_1.conda#76b32dcf243444aea9c6b804bcfa40b8 https://conda.anaconda.org/bioconda/linux-64/htslib-1.21-h5efdd21_0.tar.bz2#06b995dc2244c024b45bbb3e53ae2f27 -https://conda.anaconda.org/conda-forge/noarch/pip-24.2-pyh8b19718_1.conda#6c78fbb8ddfd64bcb55b5cbafd2d2c43 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxtst-1.2.5-hb9d3cd8_3.conda#7bbe9a0cc0df0ac5f5a8ad6d6a11af2f +https://conda.anaconda.org/conda-forge/linux-64/openjdk-23.0.1-h4c11d01_0.conda#c40dda22ec391102c2bc24dd92f1f663 https://conda.anaconda.org/bioconda/linux-64/samtools-1.21-h50ea8bc_0.tar.bz2#4a7fe11223f61cb2d950ed54e20c12ce +https://conda.anaconda.org/bioconda/noarch/picard-slim-3.3.0-hdfd78af_0.tar.bz2#fab4a4639fd22cd155b740fce3064944 diff --git a/bio/bowtie2/align/environment.yaml b/bio/bowtie2/align/environment.yaml index d961b36c609..b08cb1424d6 100644 --- a/bio/bowtie2/align/environment.yaml +++ b/bio/bowtie2/align/environment.yaml @@ -5,4 +5,5 @@ channels: dependencies: - bowtie2 =2.5.4 - samtools =1.21 + - picard-slim =3.3.0 - snakemake-wrapper-utils =0.6.2 diff --git a/bio/bowtie2/align/meta.yaml b/bio/bowtie2/align/meta.yaml index 8e7ad69b9ad..24f5751ff4c 100644 --- a/bio/bowtie2/align/meta.yaml +++ b/bio/bowtie2/align/meta.yaml @@ -21,5 +21,10 @@ output: params: - extra: additional program arguments (except for `-x`, `-U`, `-1`, `-2`, `--interleaved`, `-b`, `--met-file`, `--un`, `--al`, `--un-conc`, `--al-conc`, `-f`, `--tab6`, `--tab5`, `-q`, or `-p/--threads`) - interleaved: Input `sample` contains interleaved paired-end FASTQ/FASTA reads. `False`(default) or `True`. + - sort_program: program to sort the output. `none`(default), `samtools`, or `picard`. + - sort_extra: additional arguments for samtools or picard when sorting. + - sort_order: choose between `coordinate`(default) or `queryname`. notes: | + * The `extra` param allows for additional arguments for bowtie2. + * The `sort` param allows to enable sorting, and can be either 'none', 'samtools' or 'picard'. * This wrapper uses an inner pipe. Make sure to use at least two threads in your Snakefile. diff --git a/bio/bowtie2/align/test/Snakefile b/bio/bowtie2/align/test/Snakefile index d820dd8bfb0..3555d6cd43b 100644 --- a/bio/bowtie2/align/test/Snakefile +++ b/bio/bowtie2/align/test/Snakefile @@ -101,3 +101,43 @@ rule test_bowtie2_cram: threads: 8 # Use at least two threads wrapper: "master/bio/bowtie2/align" + + +for order in ["coordinate", "queryname"]: + for extension in ["sam", "bam", "cram"]: + for program in ["none", "samtools", "picard"]: + + rule: + name: f"bowtie2_align_{program}_{order}_{extension}" + input: + sample = ["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], + idx = multiext( + "index/genome", + ".1.bt2", + ".2.bt2", + ".3.bt2", + ".4.bt2", + ".rev.1.bt2", + ".rev.2.bt2", + ), + ref="genome.fasta", + output: + f"mapped_idx/{{sample}}.{program}_{order}.{extension}", + # idx="", + # metrics="", + # unaligned="", + # unpaired="", + # unconcordant="", + # concordant="", + params: + sort_program=program, + sort_order=order, + sort_extra="", + extra="", + log: + f"logs/bowtie2/{{sample}}.{program}_{order}_{extension}.log", + params: + extra="", # optional parameters + threads: 8 # Use at least two threads + wrapper: + "master/bio/bowtie2/align" diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index cd8b4d64f02..784ca16959e 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -4,10 +4,106 @@ __license__ = "MIT" -import os +import tempfile +from os import path from snakemake.shell import shell +from snakemake_wrapper_utils.java import get_java_opts from snakemake_wrapper_utils.samtools import get_samtools_opts +# All idx required by bowtie2 +REQUIRED_IDX = {".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2", ".rev.1.bt2", ".rev.2.bt2"} + + +# Extract arguments. +extra = snakemake.params.get("extra", "") +log = snakemake.log_fmt_shell(stdout=False, stderr=True) +sort_program = snakemake.params.get("sort_program", "none") +sort_order = snakemake.params.get("sort_order", "coordinate") +sort_extra = snakemake.params.get("sort_extra", "") +samtools_opts = get_samtools_opts( + snakemake, parse_threads=False, param_name="sort_extra" +) +java_opts = get_java_opts(snakemake) + +bowtie2_threads = snakemake.threads +samtools_threads = snakemake.threads - 1 + + +# Extract index from input +# and check that all required indices are declared +index = path.commonprefix(snakemake.input.idx)[:-1] + +if len(index) == 0: + raise ValueError("Could not determine common prefix of inputs.idx files.") + +index_extensions = [idx[len(index) :] for idx in snakemake.input.idx] +missing_idx = REQUIRED_IDX - set(index_extensions) +if len(missing_idx) > 0: + raise ValueError( + f"Missing required indices: {missing_idx} declarad as input.idx.\n" + f"Identified reference file is {index} with extensions {index_extensions}" + ) + + +# Check inputs/arguments. +if not isinstance(snakemake.input.sample, str) and len(snakemake.input.sample) not in { + 1, + 2, +}: + raise ValueError("input must have 1 (single-end) or 2 (paired-end) elements") + +if sort_order not in {"coordinate", "queryname"}: + raise ValueError(f"Unexpected value for sort_order ({sort_order})") + + +# Determine which pipe command to use for converting to bam or sorting. +if sort_program == "none": + # Correctly assign number of threads according to user request + if samtools_threads >= 1: + samtools_opts += f" --threads {samtools_threads} " + + if str(snakemake.output[0]).lower().endswith(("bam", "cram")): + # Simply convert to bam using samtools view. + pipe_cmd = " | samtools view {samtools_opts} > {snakemake.output[0]}" + else: + # Do not perform any sort nor compression, output raw sam + pipe_cmd = " > {snakemake.output[0]} " + + +elif sort_program == "samtools": + # Correctly assign number of threads according to user request + if samtools_threads >= 1: + samtools_opts += f" --threads {samtools_threads} " + + # Add name flag if needed. + if sort_order == "queryname": + sort_extra += " -n" + + # Sort alignments using samtools sort. + pipe_cmd = " | samtools sort {samtools_opts} {sort_extra} -T {tmpdir} > {snakemake.output[0]}" + +elif sort_program == "picard": + # Correctly assign number of threads according to user request + bowtie2_threads = bowtie2_threads - 1 + if bowtie2_threads <= 0: + raise ValueError( + "Not enough threads requested. This wrapper requires exactly one more." + ) + + # Sort alignments using picard SortSam. + pipe_cmd = ( + " | picard SortSam {java_opts} {sort_extra} " + "--INPUT /dev/stdin " + "--TMP_DIR {tmpdir} " + "--SORT_ORDER {sort_order} " + "--OUTPUT {snakemake.output[0]}" + ) + +else: + raise ValueError(f"Unexpected value for params.sort ({sort})") + + + def get_format(path: str) -> str: """ @@ -19,37 +115,24 @@ def get_format(path: str) -> str: return path.split(".")[-1].lower() -bowtie2_threads = snakemake.threads - 1 -if bowtie2_threads < 1: - raise ValueError( - f"This wrapper expected at least two threads, got {snakemake.threads}" - ) - -# Setting parse_threads to false since samtools performs only -# bam compression. Thus the wrapper would use *twice* the amount -# of threads reserved by user otherwise. -samtools_opts = get_samtools_opts(snakemake, parse_threads=False) - -extra = snakemake.params.get("extra", "") -log = snakemake.log_fmt_shell(stdout=True, stderr=True) - n = len(snakemake.input.sample) assert ( n == 1 or n == 2 ), "input->sample must have 1 (single-end) or 2 (paired-end) elements." -reads = "" + +sample = "" if n == 1: if get_format(snakemake.input.sample[0]) in ("bam", "sam"): - reads = f"-b {snakemake.input.sample}" + sample = f"-b {snakemake.input.sample}" else: if snakemake.params.get("interleaved", False): - reads = f"--interleaved {snakemake.input.sample}" + sample = f"--interleaved {snakemake.input.sample}" else: - reads = f"-U {snakemake.input.sample}" + sample = f"-U {snakemake.input.sample}" else: - reads = "-1 {} -2 {}".format(*snakemake.input.sample) + sample = "-1 {} -2 {}".format(*snakemake.input.sample) if all(get_format(sample) in ("fastq", "fq") for sample in snakemake.input.sample): @@ -85,17 +168,16 @@ def get_format(path: str) -> str: extra += f" --al-conc {concordant} " -index = os.path.commonprefix(snakemake.input.idx).rstrip(".") +index = path.commonprefix(snakemake.input.idx).rstrip(".") -shell( - "(bowtie2" - " --threads {bowtie2_threads}" - " {reads} " - " -x {index}" - " {extra}" - "| samtools view --with-header " - " {samtools_opts}" - " -" - ") {log}" -) + +with tempfile.TemporaryDirectory() as tmpdir: + shell( + "( bowtie2" + " --threads {bowtie2_threads}" + " {sample} " + " -x {index}" + " {extra}" + " " + pipe_cmd + ") {log}" + ) \ No newline at end of file diff --git a/test_wrappers.py b/test_wrappers.py index e9d1e1813ca..d9c21cf7bdc 100644 --- a/test_wrappers.py +++ b/test_wrappers.py @@ -196,16 +196,15 @@ def test_nonpareil(run): ) - def test_ngsbits_samplesimilarity(run): run( "bio/ngsbits/samplesimilarity", [ - "snakemake", - "--cores", - "1", - "--use-conda", - "-F", + "snakemake", + "--cores", + "1", + "--use-conda", + "-F", "similarity.tsv", ], ) @@ -2062,6 +2061,23 @@ def test_bowtie2_align(run): ) +def test_bowtie2_align_samtools_coordinate_extension(run): + for order in ["coordinate", "queryname"]: + for extension in ["sam", "bam", "cram"]: + for program in ["none", "samtools", "picard"]: + run( + "bio/bowtie2/align", + [ + "snakemake", + "--cores", + "2", + f"mapped_idx/a.{program}_{order}.{extension}", + "--use-conda", + "-F", + ], + ) + + def test_bowtie2_build(run): run( "bio/bowtie2/build", @@ -4657,6 +4673,7 @@ def test_sexdeterrmine(run): ["snakemake", "--cores", "1", "results.tsv", "-F", "--use-conda"], ) + def test_sourmash_compute(run): run( "bio/sourmash/compute/", @@ -6009,12 +6026,14 @@ def test_vg_construct(run): ["snakemake", "--cores", "1", "graph/c.vg", "--use-conda", "-F"], ) + def test_vg_giraffe(run): run( "bio/vg/giraffe", ["snakemake", "--cores", "1", "mapped/a.bam", "--use-conda", "-F"], ) + def test_vg_merge(run): run( "bio/vg/merge", From 80998d3c8291ff056cf8e656920d3bfd27bdd83e Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Tue, 3 Dec 2024 18:33:32 +0100 Subject: [PATCH 02/33] fix: black wrapper --- bio/bowtie2/align/wrapper.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index 784ca16959e..88571ae4735 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -103,8 +103,6 @@ raise ValueError(f"Unexpected value for params.sort ({sort})") - - def get_format(path: str) -> str: """ Return file format since Bowtie2 reads files that @@ -115,7 +113,6 @@ def get_format(path: str) -> str: return path.split(".")[-1].lower() - n = len(snakemake.input.sample) assert ( n == 1 or n == 2 @@ -171,7 +168,6 @@ def get_format(path: str) -> str: index = path.commonprefix(snakemake.input.idx).rstrip(".") - with tempfile.TemporaryDirectory() as tmpdir: shell( "( bowtie2" @@ -180,4 +176,4 @@ def get_format(path: str) -> str: " -x {index}" " {extra}" " " + pipe_cmd + ") {log}" - ) \ No newline at end of file + ) From c653f326d700d45960ad78fb7267e03c699df788 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Tue, 3 Dec 2024 18:35:59 +0100 Subject: [PATCH 03/33] fix: snakefmt and lint --- bio/bowtie2/align/test/Snakefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bio/bowtie2/align/test/Snakefile b/bio/bowtie2/align/test/Snakefile index 3555d6cd43b..b6f4d31e7a8 100644 --- a/bio/bowtie2/align/test/Snakefile +++ b/bio/bowtie2/align/test/Snakefile @@ -108,10 +108,11 @@ for order in ["coordinate", "queryname"]: for program in ["none", "samtools", "picard"]: rule: - name: f"bowtie2_align_{program}_{order}_{extension}" + name: + f"bowtie2_align_{program}_{order}_{extension}" input: - sample = ["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], - idx = multiext( + sample=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], + idx=multiext( "index/genome", ".1.bt2", ".2.bt2", From 9d58e6111a50c344580c9bcd258436b3dfc8121e Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Tue, 3 Dec 2024 18:36:12 +0100 Subject: [PATCH 04/33] fix: sort environment --- bio/bowtie2/align/environment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bio/bowtie2/align/environment.yaml b/bio/bowtie2/align/environment.yaml index b08cb1424d6..7dc5dc02174 100644 --- a/bio/bowtie2/align/environment.yaml +++ b/bio/bowtie2/align/environment.yaml @@ -4,6 +4,6 @@ channels: - nodefaults dependencies: - bowtie2 =2.5.4 - - samtools =1.21 - picard-slim =3.3.0 + - samtools =1.21 - snakemake-wrapper-utils =0.6.2 From b83e343a8df89748eec8e2df60abae99d1147b01 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Tue, 3 Dec 2024 18:44:33 +0100 Subject: [PATCH 05/33] fix: remove duplicated params --- bio/bowtie2/align/test/Snakefile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bio/bowtie2/align/test/Snakefile b/bio/bowtie2/align/test/Snakefile index b6f4d31e7a8..366ef2b5263 100644 --- a/bio/bowtie2/align/test/Snakefile +++ b/bio/bowtie2/align/test/Snakefile @@ -134,11 +134,9 @@ for order in ["coordinate", "queryname"]: sort_program=program, sort_order=order, sort_extra="", - extra="", + extra="", # optional parameters log: f"logs/bowtie2/{{sample}}.{program}_{order}_{extension}.log", - params: - extra="", # optional parameters threads: 8 # Use at least two threads wrapper: "master/bio/bowtie2/align" From e116b1071474519b26039fac4254d6e136ce436c Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Tue, 3 Dec 2024 18:45:53 +0100 Subject: [PATCH 06/33] fix: it is sort_program --- bio/bowtie2/align/wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index 88571ae4735..5ea124ae706 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -100,7 +100,7 @@ ) else: - raise ValueError(f"Unexpected value for params.sort ({sort})") + raise ValueError(f"Unexpected value for params.sort ({sort_program})") def get_format(path: str) -> str: From d48cf0fbf58df9048ac209a732a200861fe15879 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Tue, 3 Dec 2024 18:46:37 +0100 Subject: [PATCH 07/33] fix: remove note on sort_program --- bio/bowtie2/align/meta.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/bio/bowtie2/align/meta.yaml b/bio/bowtie2/align/meta.yaml index 24f5751ff4c..cd3f968191d 100644 --- a/bio/bowtie2/align/meta.yaml +++ b/bio/bowtie2/align/meta.yaml @@ -26,5 +26,4 @@ params: - sort_order: choose between `coordinate`(default) or `queryname`. notes: | * The `extra` param allows for additional arguments for bowtie2. - * The `sort` param allows to enable sorting, and can be either 'none', 'samtools' or 'picard'. * This wrapper uses an inner pipe. Make sure to use at least two threads in your Snakefile. From be1b1ece776406cf14e342195baf2063aad086c7 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:00:17 +0100 Subject: [PATCH 08/33] chore: add name --- bio/bowtie2/align/wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index 5ea124ae706..b627f9bb572 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -1,4 +1,4 @@ -__author__ = "Johannes Köster" +__author__ = "Johannes Köster, Jorge Langa" __copyright__ = "Copyright 2016, Johannes Köster" __email__ = "koester@jimmy.harvard.edu" __license__ = "MIT" From c784d9477117fd101f403cd7d4bbf297ad01174c Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:00:37 +0100 Subject: [PATCH 09/33] chore: add name --- bio/bowtie2/align/meta.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/bio/bowtie2/align/meta.yaml b/bio/bowtie2/align/meta.yaml index cd3f968191d..d4b3dff00e3 100644 --- a/bio/bowtie2/align/meta.yaml +++ b/bio/bowtie2/align/meta.yaml @@ -5,6 +5,7 @@ authors: - Johannes Köster - Filipe G. Vieira - Thibault Dayris + - Jorge Langa input: - sample: FASTQ file(s) - idx: Bowtie2 indexed reference index From 734b1bb5d6e7a16cf76f339751b7e9fb520aaa66 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:41:11 +0100 Subject: [PATCH 10/33] fix: remove patch from version numbers --- bio/bowtie2/align/environment.linux-64.pin.txt | 8 ++++++-- bio/bowtie2/align/environment.yaml | 6 +++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/bio/bowtie2/align/environment.linux-64.pin.txt b/bio/bowtie2/align/environment.linux-64.pin.txt index fc5ce3164ab..26858905546 100644 --- a/bio/bowtie2/align/environment.linux-64.pin.txt +++ b/bio/bowtie2/align/environment.linux-64.pin.txt @@ -21,6 +21,7 @@ https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.13-hb9d3cd8_0.conda https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.34.3-hb9d3cd8_1.conda#ee228789a85f961d14567252a03e725f https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.4-h5888daf_0.conda#db833e03127376d461e1e13e76f09b6c https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.2.0-h69a702a_1.conda#e39480b9ca41323497b05492a63bc35b +https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.6.3-hb9d3cd8_1.conda#2ecf2f1c7e4e21fcfe6423a51a992d84 https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.2.0-hc0a3c3a_1.conda#234a5554c53625688d51062645337328 https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8 https://conda.anaconda.org/conda-forge/linux-64/openssl-3.4.0-hb9d3cd8_0.conda#23cc74f77eb99315c0360ec3533147a9 @@ -37,6 +38,7 @@ https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda#172b https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8 +https://conda.anaconda.org/conda-forge/linux-64/liblzma-devel-5.6.3-hb9d3cd8_1.conda#cc4687e1814ed459f3bd6d8e05251ab2 https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-h4bc722e_0.conda#aeb98fdeb2e8f25d43ef71fbacbeec80 https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.44-hadc24fc_0.conda#f4cc49d7aa68316213e4b12be35308d1 https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.47.0-hadc24fc_1.conda#b6f02b52a174e612e89548f4663ce56a @@ -48,7 +50,8 @@ https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.17.0-h8a09558_0.conda#9 https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-he02047a_1.conda#70caf8bb6cf39a0b6b7efc885f51c0fe https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc -https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0 +https://conda.anaconda.org/conda-forge/linux-64/xz-gpl-tools-5.6.3-hbcc6ac9_1.conda#f529917bab7862aaad6867bf2ea47a99 +https://conda.anaconda.org/conda-forge/linux-64/xz-tools-5.6.3-hb9d3cd8_1.conda#de3f31a6eed01bc2b8c7dcad07ad9034 https://conda.anaconda.org/conda-forge/linux-64/zlib-1.3.1-hb9d3cd8_2.conda#c9f075ab2f33b3bbee9e62d4ad0a6cd8 https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c @@ -61,7 +64,8 @@ https://conda.anaconda.org/conda-forge/linux-64/perl-5.32.1-7_hd590300_perl5.con https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.2-h59595ed_0.conda#71004cbf7924e19c02746ccde9fd7123 https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-he73a12e_1.conda#05a8ea5f446de33006171a7afe6ae857 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.10-h4f16b4b_0.conda#0b666058a179b744a622d0a4a0c56353 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.10-h4f16b4b_1.conda#125f34a17d7b4bea418a83904ea82ea6 +https://conda.anaconda.org/conda-forge/linux-64/xz-5.6.3-hbcc6ac9_1.conda#62aae173382a8aae284726353c6a6a24 https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda#4d056880988120e29d75bfff282e0f45 https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.15.0-h7e30c49_1.conda#8f5b0b297b59e1ac160ad4beec99dbee https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda#3f43953b7d3fb3aaa1d0d0723d91e368 diff --git a/bio/bowtie2/align/environment.yaml b/bio/bowtie2/align/environment.yaml index 7dc5dc02174..5bd6909f70f 100644 --- a/bio/bowtie2/align/environment.yaml +++ b/bio/bowtie2/align/environment.yaml @@ -3,7 +3,7 @@ channels: - bioconda - nodefaults dependencies: - - bowtie2 =2.5.4 - - picard-slim =3.3.0 + - bowtie2 =2.5 + - picard-slim =3.3 - samtools =1.21 - - snakemake-wrapper-utils =0.6.2 + - snakemake-wrapper-utils =0.6 From eac0db4482aadff36677f67f5b159054782d59e5 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:51:47 +0100 Subject: [PATCH 11/33] fix: Update bio/bowtie2/align/wrapper.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- bio/bowtie2/align/wrapper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index b627f9bb572..22b65df29b9 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -26,6 +26,8 @@ java_opts = get_java_opts(snakemake) bowtie2_threads = snakemake.threads +if bowtie2_threads < 2: + raise ValueError("This wrapper requires at least 2 threads") samtools_threads = snakemake.threads - 1 From d2646f47740e711398312936db95f9bf15ff90d7 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Fri, 6 Dec 2024 09:55:27 +0100 Subject: [PATCH 12/33] refactor: rearrange everything, make pylint yell less --- bio/bowtie2/align/wrapper.py | 207 +++++++++++++++++++---------------- 1 file changed, 112 insertions(+), 95 deletions(-) diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index b627f9bb572..d2543fa76d6 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -1,36 +1,28 @@ __author__ = "Johannes Köster, Jorge Langa" -__copyright__ = "Copyright 2016, Johannes Köster" +__copyright__ = "Copyright 2024, Johannes Köster, Jorge Langa" __email__ = "koester@jimmy.harvard.edu" __license__ = "MIT" import tempfile from os import path + from snakemake.shell import shell from snakemake_wrapper_utils.java import get_java_opts from snakemake_wrapper_utils.samtools import get_samtools_opts -# All idx required by bowtie2 -REQUIRED_IDX = {".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2", ".rev.1.bt2", ".rev.2.bt2"} +# input.sample +SAMPLE = snakemake.input.sample -# Extract arguments. -extra = snakemake.params.get("extra", "") -log = snakemake.log_fmt_shell(stdout=False, stderr=True) -sort_program = snakemake.params.get("sort_program", "none") -sort_order = snakemake.params.get("sort_order", "coordinate") -sort_extra = snakemake.params.get("sort_extra", "") -samtools_opts = get_samtools_opts( - snakemake, parse_threads=False, param_name="sort_extra" -) -java_opts = get_java_opts(snakemake) +N_SAMPLE = len(SAMPLE) -bowtie2_threads = snakemake.threads -samtools_threads = snakemake.threads - 1 +if not isinstance(SAMPLE, str) and N_SAMPLE not in {1, 2, }: + raise ValueError("input must have 1 (single-end) or 2 (paired-end) elements") +# input.idx +REQUIRED_IDX = {".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2", ".rev.1.bt2", ".rev.2.bt2"} -# Extract index from input -# and check that all required indices are declared index = path.commonprefix(snakemake.input.idx)[:-1] if len(index) == 0: @@ -45,122 +37,147 @@ ) -# Check inputs/arguments. -if not isinstance(snakemake.input.sample, str) and len(snakemake.input.sample) not in { - 1, - 2, -}: - raise ValueError("input must have 1 (single-end) or 2 (paired-end) elements") +# input.ref +REF = snakemake.input.get("ref", None) -if sort_order not in {"coordinate", "queryname"}: - raise ValueError(f"Unexpected value for sort_order ({sort_order})") +# input.ref_fai +REF_FAI = snakemake.input.get("ref_fai", None) -# Determine which pipe command to use for converting to bam or sorting. -if sort_program == "none": - # Correctly assign number of threads according to user request - if samtools_threads >= 1: - samtools_opts += f" --threads {samtools_threads} " - - if str(snakemake.output[0]).lower().endswith(("bam", "cram")): - # Simply convert to bam using samtools view. - pipe_cmd = " | samtools view {samtools_opts} > {snakemake.output[0]}" - else: - # Do not perform any sort nor compression, output raw sam - pipe_cmd = " > {snakemake.output[0]} " +# output +BAM = str(snakemake.output[0]) +metrics = snakemake.output.get("metrics", None) +unaligned = snakemake.output.get("unaligned", None) +unpaired = snakemake.output.get("unpaired", None) +unconcordant = snakemake.output.get("unconcordant", None) +concordant = snakemake.output.get("concordant", None) -elif sort_program == "samtools": - # Correctly assign number of threads according to user request - if samtools_threads >= 1: - samtools_opts += f" --threads {samtools_threads} " +# log +LOG = snakemake.log_fmt_shell(stdout=False, stderr=True) + +# threads +bowtie2_threads = snakemake.threads +sort_threads = snakemake.threads - 1 - # Add name flag if needed. - if sort_order == "queryname": - sort_extra += " -n" - # Sort alignments using samtools sort. - pipe_cmd = " | samtools sort {samtools_opts} {sort_extra} -T {tmpdir} > {snakemake.output[0]}" -elif sort_program == "picard": - # Correctly assign number of threads according to user request - bowtie2_threads = bowtie2_threads - 1 - if bowtie2_threads <= 0: - raise ValueError( - "Not enough threads requested. This wrapper requires exactly one more." - ) - # Sort alignments using picard SortSam. - pipe_cmd = ( - " | picard SortSam {java_opts} {sort_extra} " - "--INPUT /dev/stdin " - "--TMP_DIR {tmpdir} " - "--SORT_ORDER {sort_order} " - "--OUTPUT {snakemake.output[0]}" +# params +extra = snakemake.params.get("extra", "") +interleaved = snakemake.params.get("interleaved", False) +sort_program = snakemake.params.get("sort_program", "none") +sort_order = snakemake.params.get("sort_order", "coordinate") +sort_extra = snakemake.params.get("sort_extra", "") +samtools_opts = get_samtools_opts( + snakemake, parse_threads=False, param_name="sort_extra" +) +java_opts = get_java_opts(snakemake) + +if not isinstance(interleaved, bool): + raise ValueError("params.interleaved must be a boolean") + +if sort_order not in {"coordinate", "queryname"}: + raise ValueError( + f"Unexpected value for sort_order ({sort_order})" + "Valid values are 'coordinate' or 'queryname'" ) -else: - raise ValueError(f"Unexpected value for params.sort ({sort_program})") +if sort_program not in {"none", "samtools", "picard"}: + raise ValueError( + f"Unexpected value for sort_program ({sort_program})" + "Valid values are 'none', 'samtools' or 'picard'" + ) + + +# shell -def get_format(path: str) -> str: +# shell.sample +def get_extension(filename: str) -> str: """ Return file format since Bowtie2 reads files that could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2). """ - if path.endswith((".gz", ".bz2")): - return path.split(".")[-2].lower() - return path.split(".")[-1].lower() + if filename.endswith((".gz", ".bz2")): + return filename.split(".")[-2].lower() + return filename.split(".")[-1].lower() -n = len(snakemake.input.sample) -assert ( - n == 1 or n == 2 -), "input->sample must have 1 (single-end) or 2 (paired-end) elements." - - -sample = "" -if n == 1: - if get_format(snakemake.input.sample[0]) in ("bam", "sam"): - sample = f"-b {snakemake.input.sample}" +CMD_INPUT = "" +if N_SAMPLE == 1: + if get_extension(SAMPLE[0]) in ("bam", "sam"): + CMD_INPUT = f"-b {SAMPLE}" else: - if snakemake.params.get("interleaved", False): - sample = f"--interleaved {snakemake.input.sample}" + if interleaved: + CMD_INPUT = f"--interleaved {SAMPLE}" else: - sample = f"-U {snakemake.input.sample}" + CMD_INPUT = f"-U {SAMPLE}" else: - sample = "-1 {} -2 {}".format(*snakemake.input.sample) + CMD_INPUT = f"-1 {SAMPLE[0]} -2 {SAMPLE[1]}" -if all(get_format(sample) in ("fastq", "fq") for sample in snakemake.input.sample): +if all(get_extension(sample) in ("fastq", "fq") for sample in SAMPLE): extra += " -q " -elif all(get_format(sample) == "tab5" for sample in snakemake.input.sample): +elif all(get_extension(sample) == "tab5" for sample in SAMPLE): extra += " --tab5 " -elif all(get_format(sample) == "tab6" for sample in snakemake.input.sample): +elif all(get_extension(sample) == "tab6" for sample in SAMPLE): extra += " --tab6 " elif all( - get_format(sample) in ("fa", "mfa", "fasta") for sample in snakemake.input.sample + get_extension(sample) in ("fa", "mfa", "fasta") for sample in SAMPLE ): extra += " -f " -metrics = snakemake.output.get("metrics") +# shell.threads +if sort_program != "none" and bowtie2_threads <= 1: + raise ValueError( + "Not enough threads requested. This wrapper requires at least two threads: " + "one for bowtie2 and one for samtools/picard." + ) + + +# shell.sort + +# Determine which pipe command to use for converting to bam or sorting. +match sort_program: + case "none": + # Correctly assign number of threads according to user request + if sort_threads >= 1: + samtools_opts += f" --threads {sort_threads} " + if BAM.lower().endswith(("bam", "cram")): + # Simply convert to bam using samtools view. + PIPE_CMD = f" | samtools view {samtools_opts} > {BAM} " + else: + # Do not perform any sort nor compression, output raw sam + PIPE_CMD = " > {BAM} " + case "samtools": + # Correctly assign number of threads according to user request + if sort_threads >= 1: + samtools_opts += f" --threads {sort_threads} " + # Add name flag if needed. + if sort_order == "queryname": + sort_extra += " -n" + # Sort alignments using samtools sort. + PIPE_CMD = " | samtools sort {samtools_opts} {sort_extra} -T {tmpdir} > {BAM}" + case "picard": + PIPE_CMD = ( + " | picard SortSam {java_opts} {sort_extra} " + "--INPUT /dev/stdin " + "--TMP_DIR {tmpdir} " + "--SORT_ORDER {sort_order} " + "--OUTPUT {BAM} " + ) + + if metrics: extra += f" --met-file {metrics} " - -unaligned = snakemake.output.get("unaligned") if unaligned: extra += f" --un {unaligned} " - -unpaired = snakemake.output.get("unpaired") if unpaired: extra += f" --al {unpaired} " - -unconcordant = snakemake.output.get("unconcordant") if unconcordant: extra += f" --un-conc {unconcordant} " - -concordant = snakemake.output.get("concordant") if concordant: extra += f" --al-conc {concordant} " @@ -172,8 +189,8 @@ def get_format(path: str) -> str: shell( "( bowtie2" " --threads {bowtie2_threads}" - " {sample} " + " {CMD_INPUT} " " -x {index}" " {extra}" - " " + pipe_cmd + ") {log}" + " " + PIPE_CMD + ") {LOG}" ) From c4f9cf39d63e31620c17c2fe99b204ada88dc6c4 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Fri, 6 Dec 2024 10:31:53 +0100 Subject: [PATCH 13/33] fix: add fai file --- bio/bowtie2/align/test/genome.fasta.fai | 1 + 1 file changed, 1 insertion(+) create mode 100644 bio/bowtie2/align/test/genome.fasta.fai diff --git a/bio/bowtie2/align/test/genome.fasta.fai b/bio/bowtie2/align/test/genome.fasta.fai new file mode 100644 index 00000000000..f3cdedb5518 --- /dev/null +++ b/bio/bowtie2/align/test/genome.fasta.fai @@ -0,0 +1 @@ +Sheila 20 8 20 21 From badae1c7551fd889fef8da7dc26f3c2dc9dc9ff1 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Fri, 6 Dec 2024 11:20:09 +0100 Subject: [PATCH 14/33] fix: typo in ref_fai --- bio/bowtie2/align/test/Snakefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bio/bowtie2/align/test/Snakefile b/bio/bowtie2/align/test/Snakefile index 366ef2b5263..6643e14b48b 100644 --- a/bio/bowtie2/align/test/Snakefile +++ b/bio/bowtie2/align/test/Snakefile @@ -86,6 +86,7 @@ rule test_bowtie2_cram: ".rev.2.bt2", ), ref="genome.fasta", + ref_fai="genome.fasta.fai", output: "mapped_idx/{sample}.cram", # idx="", @@ -122,6 +123,7 @@ for order in ["coordinate", "queryname"]: ".rev.2.bt2", ), ref="genome.fasta", + ref_fai="genome.fasta.fai", output: f"mapped_idx/{{sample}}.{program}_{order}.{extension}", # idx="", From 36b61450207ce43582caedd87f11a8284c92a8ff Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Fri, 6 Dec 2024 11:22:36 +0100 Subject: [PATCH 15/33] chore: black --- bio/bowtie2/align/wrapper.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index b9b385662ec..1267f01a124 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -11,6 +11,7 @@ from snakemake_wrapper_utils.java import get_java_opts from snakemake_wrapper_utils.samtools import get_samtools_opts + # helpers def get_extension(filename: str) -> str: """ @@ -27,7 +28,10 @@ def get_extension(filename: str) -> str: N_SAMPLE = len(SAMPLE) -if not isinstance(SAMPLE, str) and N_SAMPLE not in {1, 2, }: +if not isinstance(SAMPLE, str) and N_SAMPLE not in { + 1, + 2, +}: raise ValueError("input must have 1 (single-end) or 2 (paired-end) elements") # input.idx @@ -73,8 +77,6 @@ def get_extension(filename: str) -> str: sort_threads = snakemake.threads - 1 - - # params extra = snakemake.params.get("extra", "") interleaved = snakemake.params.get("interleaved", False) @@ -102,18 +104,18 @@ def get_extension(filename: str) -> str: ) - # shell # shell.sample -if bam_extension == "cram": - if REF is None or REF_FAI is None: - raise ValueError( - "Reference file and index are required for CRAM output." - "Please specify them as input.ref and input.ref_fai." - ) +if bam_extension == "cram" and (REF is None or REF_FAI is None): + raise ValueError( + "Reference file and index are required for CRAM output." + "Please specify them as input.ref and input.ref_fai\n" + f"input.ref: {REF}\n" + f"input.ref_fai: {REF_FAI}" + ) CMD_INPUT = "" @@ -135,9 +137,7 @@ def get_extension(filename: str) -> str: extra += " --tab5 " elif all(get_extension(sample) == "tab6" for sample in SAMPLE): extra += " --tab6 " -elif all( - get_extension(sample) in ("fa", "mfa", "fasta") for sample in SAMPLE -): +elif all(get_extension(sample) in ("fa", "mfa", "fasta") for sample in SAMPLE): extra += " -f " @@ -171,8 +171,12 @@ def get_extension(filename: str) -> str: if sort_order == "queryname": sort_extra += " -n" # Sort alignments using samtools sort. + if bam_extension == "cram": + samtools_opts += f" --reference {REF} " PIPE_CMD = " | samtools sort {samtools_opts} {sort_extra} -T {tmpdir} > {BAM}" case "picard": + if bam_extension == "cram": + picard_opts = f" REFERENCE_SEQUENCE={REF} " PIPE_CMD = ( " | picard SortSam {java_opts} {sort_extra} " "--INPUT /dev/stdin " @@ -196,7 +200,6 @@ def get_extension(filename: str) -> str: index = path.commonprefix(snakemake.input.idx).rstrip(".") - with tempfile.TemporaryDirectory() as tmpdir: shell( "( bowtie2" From d97f32a8c13e6b143469dafd1b4c3ae1c434f886 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Fri, 6 Dec 2024 11:27:38 +0100 Subject: [PATCH 16/33] chore: remove unused stuff --- bio/bowtie2/align/test/Snakefile | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/bio/bowtie2/align/test/Snakefile b/bio/bowtie2/align/test/Snakefile index 6643e14b48b..2e2cc9b609b 100644 --- a/bio/bowtie2/align/test/Snakefile +++ b/bio/bowtie2/align/test/Snakefile @@ -86,7 +86,6 @@ rule test_bowtie2_cram: ".rev.2.bt2", ), ref="genome.fasta", - ref_fai="genome.fasta.fai", output: "mapped_idx/{sample}.cram", # idx="", @@ -123,20 +122,13 @@ for order in ["coordinate", "queryname"]: ".rev.2.bt2", ), ref="genome.fasta", - ref_fai="genome.fasta.fai", output: f"mapped_idx/{{sample}}.{program}_{order}.{extension}", - # idx="", - # metrics="", - # unaligned="", - # unpaired="", - # unconcordant="", - # concordant="", params: sort_program=program, sort_order=order, sort_extra="", - extra="", # optional parameters + extra="", log: f"logs/bowtie2/{{sample}}.{program}_{order}_{extension}.log", threads: 8 # Use at least two threads From ceba80fe131fa7db1d426c4bb66ccf8475546cba Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Fri, 6 Dec 2024 11:32:01 +0100 Subject: [PATCH 17/33] fix: i'd swear that I had put the fai files before --- bio/bowtie2/align/test/Snakefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bio/bowtie2/align/test/Snakefile b/bio/bowtie2/align/test/Snakefile index 2e2cc9b609b..0b210a2e8cf 100644 --- a/bio/bowtie2/align/test/Snakefile +++ b/bio/bowtie2/align/test/Snakefile @@ -86,6 +86,7 @@ rule test_bowtie2_cram: ".rev.2.bt2", ), ref="genome.fasta", + ref_fai="genome.fasta.fai", output: "mapped_idx/{sample}.cram", # idx="", @@ -122,6 +123,7 @@ for order in ["coordinate", "queryname"]: ".rev.2.bt2", ), ref="genome.fasta", + ref_fai="genome.fasta.fai", output: f"mapped_idx/{{sample}}.{program}_{order}.{extension}", params: From ecdab946f646fdb289d9565b797afd2fb6e879af Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Fri, 6 Dec 2024 11:35:41 +0100 Subject: [PATCH 18/33] chore: typo --- bio/bowtie2/align/wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index 1267f01a124..3d93e758f07 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -46,7 +46,7 @@ def get_extension(filename: str) -> str: missing_idx = REQUIRED_IDX - set(index_extensions) if len(missing_idx) > 0: raise ValueError( - f"Missing required indices: {missing_idx} declarad as input.idx.\n" + f"Missing required indices: {missing_idx} declared as input.idx.\n" f"Identified reference file is {index} with extensions {index_extensions}" ) From a6b511a3cbf894138c80ec3daa20835c6aaafc91 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Thu, 12 Dec 2024 17:41:49 +0100 Subject: [PATCH 19/33] refactor: uppercase constants and rearrange --- bio/bowtie2/align/wrapper.py | 51 +++++++++++++++++------------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index 3d93e758f07..4d08b5b2535 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -23,18 +23,19 @@ def get_extension(filename: str) -> str: return filename.split(".")[-1].lower() -# input.sample +# input SAMPLE = snakemake.input.sample +INDEX = snakemake.input.idx +REF = snakemake.input.get("ref", None) +REF_FAI = snakemake.input.get("ref_fai", None) -N_SAMPLE = len(SAMPLE) -if not isinstance(SAMPLE, str) and N_SAMPLE not in { - 1, - 2, -}: - raise ValueError("input must have 1 (single-end) or 2 (paired-end) elements") # input.idx + +if not isinstance(SAMPLE, str) and len(SAMPLE) not in [1, 2]: + raise ValueError("input must have 1 (single-end) or 2 (paired-end) elements") + REQUIRED_IDX = {".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2", ".rev.1.bt2", ".rev.2.bt2"} index = path.commonprefix(snakemake.input.idx)[:-1] @@ -51,20 +52,16 @@ def get_extension(filename: str) -> str: ) -# input.ref -REF = snakemake.input.get("ref", None) -# input.ref_fai -REF_FAI = snakemake.input.get("ref_fai", None) # output BAM = str(snakemake.output[0]) -metrics = snakemake.output.get("metrics", None) -unaligned = snakemake.output.get("unaligned", None) -unpaired = snakemake.output.get("unpaired", None) -unconcordant = snakemake.output.get("unconcordant", None) -concordant = snakemake.output.get("concordant", None) +METRICS = snakemake.output.get("metrics", None) +UNALIGNED = snakemake.output.get("unaligned", None) +UNPAIRED = snakemake.output.get("unpaired", None) +UNCONCORDANT = snakemake.output.get("unconcordant", None) +CONCORDANT = snakemake.output.get("concordant", None) bam_extension = get_extension(BAM) @@ -119,7 +116,7 @@ def get_extension(filename: str) -> str: CMD_INPUT = "" -if N_SAMPLE == 1: +if len(SAMPLE) == 1: if get_extension(SAMPLE[0]) in ("bam", "sam"): CMD_INPUT = f"-b {SAMPLE}" else: @@ -186,16 +183,16 @@ def get_extension(filename: str) -> str: ) -if metrics: - extra += f" --met-file {metrics} " -if unaligned: - extra += f" --un {unaligned} " -if unpaired: - extra += f" --al {unpaired} " -if unconcordant: - extra += f" --un-conc {unconcordant} " -if concordant: - extra += f" --al-conc {concordant} " +if METRICS: + extra += f" --met-file {METRICS} " +if UNALIGNED: + extra += f" --un {UNALIGNED} " +if UNPAIRED: + extra += f" --al {UNPAIRED} " +if UNCONCORDANT: + extra += f" --un-conc {UNCONCORDANT} " +if CONCORDANT: + extra += f" --al-conc {CONCORDANT} " index = path.commonprefix(snakemake.input.idx).rstrip(".") From 056cd060e7ebd0c6e13601c67dee2cf6264ad27f Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Thu, 19 Dec 2024 17:08:27 +0100 Subject: [PATCH 20/33] refactor: distinguish original variables from the ones generated for the commands --- bio/bowtie2/align/wrapper.py | 132 +++++++++++++++++------------------ 1 file changed, 65 insertions(+), 67 deletions(-) diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index 4d08b5b2535..435df1d2473 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -29,10 +29,40 @@ def get_extension(filename: str) -> str: REF = snakemake.input.get("ref", None) REF_FAI = snakemake.input.get("ref_fai", None) +# output +BAM = str(snakemake.output[0]) +METRICS = snakemake.output.get("metrics", None) +UNALIGNED = snakemake.output.get("unaligned", None) +UNPAIRED = snakemake.output.get("unpaired", None) +UNCONCORDANT = snakemake.output.get("unconcordant", None) +CONCORDANT = snakemake.output.get("concordant", None) + + + +# log +LOG = snakemake.log_fmt_shell(stdout=False, stderr=True) + +# threads +THREADS = snakemake.threads +sort_threads = snakemake.threads - 1 + + +# params +EXTRA = snakemake.params.get("extra", "") +IS_INTERLEAVED = snakemake.params.get("interleaved", False) +SORT_PROGRAM = snakemake.params.get("sort_program", "none") +SORT_ORDER = snakemake.params.get("sort_order", "coordinate") +SORT_EXTRA = snakemake.params.get("sort_extra", "") +SAMTOOLS_OPTS = get_samtools_opts( + snakemake, parse_threads=False, param_name="sort_extra" +) +JAVA_OPTS = get_java_opts(snakemake) + -# input.idx +# check +# check inputs if not isinstance(SAMPLE, str) and len(SAMPLE) not in [1, 2]: raise ValueError("input must have 1 (single-end) or 2 (paired-end) elements") @@ -51,58 +81,26 @@ def get_extension(filename: str) -> str: f"Identified reference file is {index} with extensions {index_extensions}" ) - - - - -# output -BAM = str(snakemake.output[0]) -METRICS = snakemake.output.get("metrics", None) -UNALIGNED = snakemake.output.get("unaligned", None) -UNPAIRED = snakemake.output.get("unpaired", None) -UNCONCORDANT = snakemake.output.get("unconcordant", None) -CONCORDANT = snakemake.output.get("concordant", None) - +# check outputs bam_extension = get_extension(BAM) - -# log -LOG = snakemake.log_fmt_shell(stdout=False, stderr=True) - -# threads -bowtie2_threads = snakemake.threads -sort_threads = snakemake.threads - 1 - - -# params -extra = snakemake.params.get("extra", "") -interleaved = snakemake.params.get("interleaved", False) -sort_program = snakemake.params.get("sort_program", "none") -sort_order = snakemake.params.get("sort_order", "coordinate") -sort_extra = snakemake.params.get("sort_extra", "") -samtools_opts = get_samtools_opts( - snakemake, parse_threads=False, param_name="sort_extra" -) -java_opts = get_java_opts(snakemake) - -if not isinstance(interleaved, bool): +# check params +if not isinstance(IS_INTERLEAVED, bool): raise ValueError("params.interleaved must be a boolean") -if sort_order not in {"coordinate", "queryname"}: +if SORT_ORDER not in {"coordinate", "queryname"}: raise ValueError( - f"Unexpected value for sort_order ({sort_order})" + f"Unexpected value for sort_order ({SORT_ORDER})" "Valid values are 'coordinate' or 'queryname'" ) -if sort_program not in {"none", "samtools", "picard"}: +if SORT_PROGRAM not in {"none", "samtools", "picard"}: raise ValueError( - f"Unexpected value for sort_program ({sort_program})" + f"Unexpected value for sort_program ({SORT_PROGRAM})" "Valid values are 'none', 'samtools' or 'picard'" ) -# shell - # shell.sample @@ -120,26 +118,26 @@ def get_extension(filename: str) -> str: if get_extension(SAMPLE[0]) in ("bam", "sam"): CMD_INPUT = f"-b {SAMPLE}" else: - if interleaved: + if IS_INTERLEAVED: CMD_INPUT = f"--interleaved {SAMPLE}" else: CMD_INPUT = f"-U {SAMPLE}" else: CMD_INPUT = f"-1 {SAMPLE[0]} -2 {SAMPLE[1]}" - +cmd_extra = EXTRA if all(get_extension(sample) in ("fastq", "fq") for sample in SAMPLE): - extra += " -q " + cmd_extra += " -q " elif all(get_extension(sample) == "tab5" for sample in SAMPLE): - extra += " --tab5 " + cmd_extra += " --tab5 " elif all(get_extension(sample) == "tab6" for sample in SAMPLE): - extra += " --tab6 " + cmd_extra += " --tab6 " elif all(get_extension(sample) in ("fa", "mfa", "fasta") for sample in SAMPLE): - extra += " -f " + cmd_extra += " -f " # shell.threads -if sort_program != "none" and bowtie2_threads <= 1: +if SORT_PROGRAM != "none" and THREADS <= 1: raise ValueError( "Not enough threads requested. This wrapper requires at least two threads: " "one for bowtie2 and one for samtools/picard." @@ -149,60 +147,60 @@ def get_extension(filename: str) -> str: # shell.sort # Determine which pipe command to use for converting to bam or sorting. -match sort_program: +match SORT_PROGRAM: case "none": # Correctly assign number of threads according to user request if sort_threads >= 1: - samtools_opts += f" --threads {sort_threads} " + SAMTOOLS_OPTS += f" --threads {sort_threads} " if BAM.lower().endswith(("bam", "cram")): # Simply convert to bam using samtools view. - PIPE_CMD = f" | samtools view {samtools_opts} > {BAM} " + PIPE_CMD = f" | samtools view {SAMTOOLS_OPTS} > {BAM} " else: # Do not perform any sort nor compression, output raw sam PIPE_CMD = " > {BAM} " case "samtools": # Correctly assign number of threads according to user request if sort_threads >= 1: - samtools_opts += f" --threads {sort_threads} " + SAMTOOLS_OPTS += f" --threads {sort_threads} " # Add name flag if needed. - if sort_order == "queryname": - sort_extra += " -n" + if SORT_ORDER == "queryname": + SORT_EXTRA += " -n" # Sort alignments using samtools sort. if bam_extension == "cram": - samtools_opts += f" --reference {REF} " - PIPE_CMD = " | samtools sort {samtools_opts} {sort_extra} -T {tmpdir} > {BAM}" + SAMTOOLS_OPTS += f" --reference {REF} " + PIPE_CMD = " | samtools sort {SAMTOOLS_OPTS} {SORT_EXTRA} -T {TMPDIR} > {BAM}" case "picard": if bam_extension == "cram": - picard_opts = f" REFERENCE_SEQUENCE={REF} " + PICARD_OPTS = f" REFERENCE_SEQUENCE={REF} " PIPE_CMD = ( - " | picard SortSam {java_opts} {sort_extra} " + " | picard SortSam {JAVA_OPTS} {SORT_EXTRA} " "--INPUT /dev/stdin " - "--TMP_DIR {tmpdir} " - "--SORT_ORDER {sort_order} " + "--TMP_DIR {TMPDIR} " + "--SORT_ORDER {SORT_ORDER} " "--OUTPUT {BAM} " ) if METRICS: - extra += f" --met-file {METRICS} " + cmd_extra += f" --met-file {METRICS} " if UNALIGNED: - extra += f" --un {UNALIGNED} " + cmd_extra += f" --un {UNALIGNED} " if UNPAIRED: - extra += f" --al {UNPAIRED} " + cmd_extra += f" --al {UNPAIRED} " if UNCONCORDANT: - extra += f" --un-conc {UNCONCORDANT} " + cmd_extra += f" --un-conc {UNCONCORDANT} " if CONCORDANT: - extra += f" --al-conc {CONCORDANT} " + cmd_extra += f" --al-conc {CONCORDANT} " index = path.commonprefix(snakemake.input.idx).rstrip(".") -with tempfile.TemporaryDirectory() as tmpdir: +with tempfile.TemporaryDirectory() as TMPDIR: shell( "( bowtie2" - " --threads {bowtie2_threads}" + " --threads {THREADS}" " {CMD_INPUT} " " -x {index}" - " {extra}" + " {cmd_extra}" " " + PIPE_CMD + ") {LOG}" ) From fb07443cc86f535acd1a8efe15de4e98dfaae58e Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Thu, 19 Dec 2024 20:07:45 +0100 Subject: [PATCH 21/33] refactor: rearrange everything, make the index creation work --- bio/bowtie2/align/wrapper.py | 143 ++++++++++++++++++++--------------- 1 file changed, 83 insertions(+), 60 deletions(-) diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index 435df1d2473..7cc8b1696f1 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -36,7 +36,7 @@ def get_extension(filename: str) -> str: UNPAIRED = snakemake.output.get("unpaired", None) UNCONCORDANT = snakemake.output.get("unconcordant", None) CONCORDANT = snakemake.output.get("concordant", None) - +BAI = snakemake.output.get("idx", None) # log @@ -55,7 +55,7 @@ def get_extension(filename: str) -> str: SORT_EXTRA = snakemake.params.get("sort_extra", "") SAMTOOLS_OPTS = get_samtools_opts( snakemake, parse_threads=False, param_name="sort_extra" -) +) + " " JAVA_OPTS = get_java_opts(snakemake) @@ -68,17 +68,18 @@ def get_extension(filename: str) -> str: REQUIRED_IDX = {".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2", ".rev.1.bt2", ".rev.2.bt2"} -index = path.commonprefix(snakemake.input.idx)[:-1] +index_prefix = path.commonprefix(snakemake.input.idx).rstrip(".") + -if len(index) == 0: +if len(index_prefix) == 0: raise ValueError("Could not determine common prefix of inputs.idx files.") -index_extensions = [idx[len(index) :] for idx in snakemake.input.idx] +index_extensions = [idx[len(index_prefix) :] for idx in snakemake.input.idx] missing_idx = REQUIRED_IDX - set(index_extensions) if len(missing_idx) > 0: raise ValueError( f"Missing required indices: {missing_idx} declared as input.idx.\n" - f"Identified reference file is {index} with extensions {index_extensions}" + f"Identified reference file is {index_prefix} with extensions {index_extensions}" ) # check outputs @@ -100,10 +101,13 @@ def get_extension(filename: str) -> str: "Valid values are 'none', 'samtools' or 'picard'" ) +if SORT_PROGRAM != "none" and THREADS <= 1: + raise ValueError( + "Not enough threads requested. This wrapper requires at least two threads: " + "one for bowtie2 and one for samtools/picard." + ) -# shell.sample - - +# check input - output compatibility if bam_extension == "cram" and (REF is None or REF_FAI is None): raise ValueError( "Reference file and index are required for CRAM output." @@ -112,19 +116,33 @@ def get_extension(filename: str) -> str: f"input.ref_fai: {REF_FAI}" ) +if BAI is not None and SORT_PROGRAM == "none": + raise ValueError( + "Index file is requested but no sort program is specified." + "Please specify a sort program to generate the index file." + ) + + +# compose shell command -CMD_INPUT = "" +# input part +cmd_input = "" if len(SAMPLE) == 1: if get_extension(SAMPLE[0]) in ("bam", "sam"): - CMD_INPUT = f"-b {SAMPLE}" + cmd_input = f"-b {SAMPLE}" else: if IS_INTERLEAVED: - CMD_INPUT = f"--interleaved {SAMPLE}" + cmd_input = f"--interleaved {SAMPLE}" else: - CMD_INPUT = f"-U {SAMPLE}" + cmd_input = f"-U {SAMPLE}" else: - CMD_INPUT = f"-1 {SAMPLE[0]} -2 {SAMPLE[1]}" + cmd_input = f"-1 {SAMPLE[0]} -2 {SAMPLE[1]}" +cmd_index = index_prefix +cmd_threads = THREADS + + +# extra part cmd_extra = EXTRA if all(get_extension(sample) in ("fastq", "fq") for sample in SAMPLE): cmd_extra += " -q " @@ -135,72 +153,77 @@ def get_extension(filename: str) -> str: elif all(get_extension(sample) in ("fa", "mfa", "fasta") for sample in SAMPLE): cmd_extra += " -f " - -# shell.threads -if SORT_PROGRAM != "none" and THREADS <= 1: - raise ValueError( - "Not enough threads requested. This wrapper requires at least two threads: " - "one for bowtie2 and one for samtools/picard." - ) +if METRICS: + cmd_extra += f" --met-file {METRICS} " +if UNALIGNED: + cmd_extra += f" --un {UNALIGNED} " +if UNPAIRED: + cmd_extra += f" --al {UNPAIRED} " +if UNCONCORDANT: + cmd_extra += f" --un-conc {UNCONCORDANT} " +if CONCORDANT: + cmd_extra += f" --al-conc {CONCORDANT} " # shell.sort # Determine which pipe command to use for converting to bam or sorting. match SORT_PROGRAM: - case "none": - # Correctly assign number of threads according to user request - if sort_threads >= 1: - SAMTOOLS_OPTS += f" --threads {sort_threads} " - if BAM.lower().endswith(("bam", "cram")): - # Simply convert to bam using samtools view. - PIPE_CMD = f" | samtools view {SAMTOOLS_OPTS} > {BAM} " - else: - # Do not perform any sort nor compression, output raw sam - PIPE_CMD = " > {BAM} " case "samtools": # Correctly assign number of threads according to user request if sort_threads >= 1: - SAMTOOLS_OPTS += f" --threads {sort_threads} " - # Add name flag if needed. + SAMTOOLS_OPTS += f"--threads {sort_threads} " + if BAI: + bam = f"{BAM}##idx##{BAI}" + SAMTOOLS_OPTS += f"--write-index " + else: + bam = BAM if SORT_ORDER == "queryname": - SORT_EXTRA += " -n" - # Sort alignments using samtools sort. + SORT_EXTRA += "-n " if bam_extension == "cram": - SAMTOOLS_OPTS += f" --reference {REF} " - PIPE_CMD = " | samtools sort {SAMTOOLS_OPTS} {SORT_EXTRA} -T {TMPDIR} > {BAM}" + SAMTOOLS_OPTS += f"--reference {REF} --output-fmt CRAM " + cmd_output = ( + "| samtools sort " + "{SAMTOOLS_OPTS} " + "{SORT_EXTRA} " + "-T {TMPDIR} " + "-o {bam} " + ) + case "picard": + PICARD_OPTS = "" if bam_extension == "cram": - PICARD_OPTS = f" REFERENCE_SEQUENCE={REF} " - PIPE_CMD = ( - " | picard SortSam {JAVA_OPTS} {SORT_EXTRA} " + PICARD_OPTS += f"--REFERENCE_SEQUENCE {REF} " + if BAI: + PICARD_OPTS += f"--CREATE_INDEX true " + cmd_output = ( + "| picard SortSam {JAVA_OPTS} {SORT_EXTRA} " "--INPUT /dev/stdin " "--TMP_DIR {TMPDIR} " "--SORT_ORDER {SORT_ORDER} " "--OUTPUT {BAM} " ) + + case _: + # Correctly assign number of threads according to user request + if sort_threads >= 1: + SAMTOOLS_OPTS += f"--threads {sort_threads} " + if bam_extension == "bam": + cmd_output = f"| samtools view {SAMTOOLS_OPTS} --output {BAM}" + elif bam_extension == "cram": + cmd_output = f"| samtools view {SAMTOOLS_OPTS} --output {BAM} --output-fmt CRAM --reference {REF}" + else: + cmd_output = "> {BAM} " -if METRICS: - cmd_extra += f" --met-file {METRICS} " -if UNALIGNED: - cmd_extra += f" --un {UNALIGNED} " -if UNPAIRED: - cmd_extra += f" --al {UNPAIRED} " -if UNCONCORDANT: - cmd_extra += f" --un-conc {UNCONCORDANT} " -if CONCORDANT: - cmd_extra += f" --al-conc {CONCORDANT} " - - -index = path.commonprefix(snakemake.input.idx).rstrip(".") - +# let's rock! with tempfile.TemporaryDirectory() as TMPDIR: shell( - "( bowtie2" - " --threads {THREADS}" - " {CMD_INPUT} " - " -x {index}" - " {cmd_extra}" - " " + PIPE_CMD + ") {LOG}" + "( bowtie2 " + "--threads {THREADS} " + "{cmd_input} " + "-x {cmd_index} " + "{cmd_extra} " + + cmd_output + + " ) {LOG}" ) From 2ed25a1f289a7402bc8f1eb7ecb1f722eb775f49 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Thu, 19 Dec 2024 22:08:11 +0100 Subject: [PATCH 22/33] refactor: expand tests, comment extra output parts --- bio/bowtie2/align/meta.yaml | 10 +-- bio/bowtie2/align/test/Snakefile | 115 +++++++++---------------------- bio/bowtie2/align/wrapper.py | 52 +++++++------- test_wrappers.py | 31 ++++----- 4 files changed, 74 insertions(+), 134 deletions(-) diff --git a/bio/bowtie2/align/meta.yaml b/bio/bowtie2/align/meta.yaml index d4b3dff00e3..225c7a809da 100644 --- a/bio/bowtie2/align/meta.yaml +++ b/bio/bowtie2/align/meta.yaml @@ -14,11 +14,11 @@ input: output: - SAM/BAM/CRAM file. This must be the first output file in the output file list. - idx: Optional path to bam index. - - metrics: Optional path to metrics file. - - unaligned: Optional path to unaligned unpaired reads. - - unpaired: Optional path to unpaired reads that aligned at least once. - - unconcordant: Optional path to pairs that didn't align concordantly. - - concordant: Optional path to pairs that aligned concordantly at least once. + # - metrics: Optional path to metrics file. + # - unaligned: Optional path to unaligned unpaired reads. + # - unpaired: Optional path to unpaired reads that aligned at least once. + # - unconcordant: Optional path to pairs that didn't align concordantly. + # - concordant: Optional path to pairs that aligned concordantly at least once. params: - extra: additional program arguments (except for `-x`, `-U`, `-1`, `-2`, `--interleaved`, `-b`, `--met-file`, `--un`, `--al`, `--un-conc`, `--al-conc`, `-f`, `--tab6`, `--tab5`, `-q`, or `-p/--threads`) - interleaved: Input `sample` contains interleaved paired-end FASTQ/FASTA reads. `False`(default) or `True`. diff --git a/bio/bowtie2/align/test/Snakefile b/bio/bowtie2/align/test/Snakefile index 0b210a2e8cf..a1aa608f855 100644 --- a/bio/bowtie2/align/test/Snakefile +++ b/bio/bowtie2/align/test/Snakefile @@ -1,4 +1,4 @@ -rule test_bowtie2: +rule test_bowtie2_sam: input: sample=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], idx=multiext( @@ -10,25 +10,16 @@ rule test_bowtie2: ".rev.1.bt2", ".rev.2.bt2", ), - # ref="genome.fasta", #Required for CRAM output output: - "mapped/{sample}.bam", - # idx="", - # metrics="", - # unaligned="", - # unpaired="", - # unconcordant="", - # concordant="", + "mapped_sam/{sample}.sam", log: - "logs/bowtie2/{sample}.log", - params: - extra="", # optional parameters + "logs/bowtie2/mapped_sam_{sample}.log", threads: 8 # Use at least two threads wrapper: "master/bio/bowtie2/align" -use rule test_bowtie2 as test_bowtie2_se_gz with: +use rule test_bowtie2_sam as test_bowtie2_se_gz with: input: sample=["reads/{sample}.1.fastq.gz"], idx=multiext( @@ -42,38 +33,26 @@ use rule test_bowtie2 as test_bowtie2_se_gz with: ), output: "mapped_se_gz/{sample}.bam", + log: + "logs/bowtie2/mapped_se_gz_{sample}.log", -rule test_bowtie2_index: - input: - sample=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], - idx=multiext( - "index/genome", - ".1.bt2", - ".2.bt2", - ".3.bt2", - ".4.bt2", - ".rev.1.bt2", - ".rev.2.bt2", - ), +use rule test_bowtie2_sam as test_bowtie2_extra_outputs with: output: - "mapped_idx/{sample}.bam", - idx="mapped_idx/{sample}.bam.bai", - metrics="mapped_idx/{sample}.metrics.txt", - unaligned="mapped_idx/{sample}.unaligned.sam", - unpaired="mapped_idx/{sample}.unpaired.sam", - # unconcordant="", - # concordant="", + "mapped_multi/{sample}.bam", + idx="mapped_multi/{sample}.bam.bai", + # metrics="mapped_multi/{sample}.metrics.txt", + # unaligned=["mapped_multi/{sample}.unaligned.1.fq"], + # unpaired="mapped_multi/{sample}.unpaired.fq"], + # unconcordant="mapped_multi/{sample}.unconcordant.sam", + # concordant="mapped_multi/{sample}.concordant.sam", log: - "logs/bowtie2/{sample}.log", + "logs/bowtie2/mapped_multi_{sample}.log", params: - extra="", # optional parameters - threads: 8 # Use at least two threads - wrapper: - "master/bio/bowtie2/align" + sort_program="samtools", -rule test_bowtie2_cram: +use rule test_bowtie2_sam as test_bowtie2_cram with: input: sample=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], idx=multiext( @@ -88,51 +67,23 @@ rule test_bowtie2_cram: ref="genome.fasta", ref_fai="genome.fasta.fai", output: - "mapped_idx/{sample}.cram", - # idx="", - # metrics="", - # unaligned="", - # unpaired="", - # unconcordant="", - # concordant="", + "mapped_cram/{sample}.cram", log: - "logs/bowtie2/{sample}.log", - params: - extra="", # optional parameters - threads: 8 # Use at least two threads - wrapper: - "master/bio/bowtie2/align" + "logs/bowtie2/samtools_cram_{sample}.log", +use rule test_bowtie2_sam as test_bowtie2_sort_samtools with: + output: + "mapped_samtools/{sample}.bam", + log: + "logs/bowtie2/mapped_samtools_{sample}.log", + params: + sort_program="samtools" -for order in ["coordinate", "queryname"]: - for extension in ["sam", "bam", "cram"]: - for program in ["none", "samtools", "picard"]: - rule: - name: - f"bowtie2_align_{program}_{order}_{extension}" - input: - sample=["reads/{sample}.1.fastq", "reads/{sample}.2.fastq"], - idx=multiext( - "index/genome", - ".1.bt2", - ".2.bt2", - ".3.bt2", - ".4.bt2", - ".rev.1.bt2", - ".rev.2.bt2", - ), - ref="genome.fasta", - ref_fai="genome.fasta.fai", - output: - f"mapped_idx/{{sample}}.{program}_{order}.{extension}", - params: - sort_program=program, - sort_order=order, - sort_extra="", - extra="", - log: - f"logs/bowtie2/{{sample}}.{program}_{order}_{extension}.log", - threads: 8 # Use at least two threads - wrapper: - "master/bio/bowtie2/align" +use rule test_bowtie2_sam as test_bowtie2_sort_picard with: + output: + "mapped_picard/{sample}.bam", + log: + "logs/bowtie2/mapped_picard_{sample}.log", + params: + sort_program="picard" diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index 7cc8b1696f1..c88c087e108 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -31,11 +31,11 @@ def get_extension(filename: str) -> str: # output BAM = str(snakemake.output[0]) -METRICS = snakemake.output.get("metrics", None) -UNALIGNED = snakemake.output.get("unaligned", None) -UNPAIRED = snakemake.output.get("unpaired", None) -UNCONCORDANT = snakemake.output.get("unconcordant", None) -CONCORDANT = snakemake.output.get("concordant", None) +# METRICS = snakemake.output.get("metrics", None) +# UNALIGNED = snakemake.output.get("unaligned", None) +# UNPAIRED = snakemake.output.get("unpaired", None) +# UNCONCORDANT = snakemake.output.get("unconcordant", None) +# CONCORDANT = snakemake.output.get("concordant", None) BAI = snakemake.output.get("idx", None) @@ -53,13 +53,12 @@ def get_extension(filename: str) -> str: SORT_PROGRAM = snakemake.params.get("sort_program", "none") SORT_ORDER = snakemake.params.get("sort_order", "coordinate") SORT_EXTRA = snakemake.params.get("sort_extra", "") -SAMTOOLS_OPTS = get_samtools_opts( - snakemake, parse_threads=False, param_name="sort_extra" -) + " " +SAMTOOLS_OPTS = ( + get_samtools_opts(snakemake, parse_threads=False, param_name="sort_extra") + " " +) JAVA_OPTS = get_java_opts(snakemake) - # check # check inputs @@ -82,8 +81,6 @@ def get_extension(filename: str) -> str: f"Identified reference file is {index_prefix} with extensions {index_extensions}" ) -# check outputs -bam_extension = get_extension(BAM) # check params if not isinstance(IS_INTERLEAVED, bool): @@ -108,6 +105,8 @@ def get_extension(filename: str) -> str: ) # check input - output compatibility +bam_extension = get_extension(BAM) + if bam_extension == "cram" and (REF is None or REF_FAI is None): raise ValueError( "Reference file and index are required for CRAM output." @@ -153,19 +152,19 @@ def get_extension(filename: str) -> str: elif all(get_extension(sample) in ("fa", "mfa", "fasta") for sample in SAMPLE): cmd_extra += " -f " -if METRICS: - cmd_extra += f" --met-file {METRICS} " -if UNALIGNED: - cmd_extra += f" --un {UNALIGNED} " -if UNPAIRED: - cmd_extra += f" --al {UNPAIRED} " -if UNCONCORDANT: - cmd_extra += f" --un-conc {UNCONCORDANT} " -if CONCORDANT: - cmd_extra += f" --al-conc {CONCORDANT} " +# if METRICS: +# cmd_extra += f" --met-file {METRICS} " +# if UNALIGNED: +# cmd_extra += f" --un {UNALIGNED} " +# if UNPAIRED: +# cmd_extra += f" --al {UNPAIRED} " +# if UNCONCORDANT: +# cmd_extra += f" --un-conc {UNCONCORDANT} " +# if CONCORDANT: +# cmd_extra += f" --al-conc {CONCORDANT} " -# shell.sort +# sort or not part # Determine which pipe command to use for converting to bam or sorting. match SORT_PROGRAM: @@ -189,7 +188,7 @@ def get_extension(filename: str) -> str: "-T {TMPDIR} " "-o {bam} " ) - + case "picard": PICARD_OPTS = "" if bam_extension == "cram": @@ -203,9 +202,8 @@ def get_extension(filename: str) -> str: "--SORT_ORDER {SORT_ORDER} " "--OUTPUT {BAM} " ) - + case _: - # Correctly assign number of threads according to user request if sort_threads >= 1: SAMTOOLS_OPTS += f"--threads {sort_threads} " if bam_extension == "bam": @@ -223,7 +221,5 @@ def get_extension(filename: str) -> str: "--threads {THREADS} " "{cmd_input} " "-x {cmd_index} " - "{cmd_extra} " - + cmd_output - + " ) {LOG}" + "{cmd_extra} " + cmd_output + " ) {LOG}" ) diff --git a/test_wrappers.py b/test_wrappers.py index d9c21cf7bdc..9b22f7f1a3e 100644 --- a/test_wrappers.py +++ b/test_wrappers.py @@ -2042,40 +2042,33 @@ def test_blast_blastn(run): def test_bowtie2_align(run): run( "bio/bowtie2/align", - ["snakemake", "--cores", "2", "mapped_idx/a.cram", "--use-conda", "-F"], + ["snakemake", "--cores", "1", "mapped_sam/a.sam", "--use-conda", "-F"], ) run( "bio/bowtie2/align", - ["snakemake", "--cores", "2", "mapped_idx/a.bam", "--use-conda", "-F"], + ["snakemake", "--cores", "2", "mapped_se_gz/a.bam", "--use-conda", "-F"], ) run( "bio/bowtie2/align", - ["snakemake", "--cores", "2", "mapped/a.bam", "--use-conda", "-F"], + ["snakemake", "--cores", "2", "mapped_multi/a.bam", "--use-conda", "-F"], ) run( "bio/bowtie2/align", - ["snakemake", "--cores", "2", "mapped_se_gz/a.bam", "--use-conda", "-F"], + ["snakemake", "--cores", "2", "mapped_cram/a.cram", "--use-conda", "-F"], ) + run( + "bio/bowtie2/align", + ["snakemake", "--cores", "2", "mapped_samtools/a.bam", "--use-conda", "-F"], + ) -def test_bowtie2_align_samtools_coordinate_extension(run): - for order in ["coordinate", "queryname"]: - for extension in ["sam", "bam", "cram"]: - for program in ["none", "samtools", "picard"]: - run( - "bio/bowtie2/align", - [ - "snakemake", - "--cores", - "2", - f"mapped_idx/a.{program}_{order}.{extension}", - "--use-conda", - "-F", - ], - ) + run( + "bio/bowtie2/align", + ["snakemake", "--cores", "2", "mapped_picard/a.bam", "--use-conda", "-F"], + ) def test_bowtie2_build(run): From c0935760f5bbd89e65425e883face352efddeec7 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Thu, 19 Dec 2024 22:08:55 +0100 Subject: [PATCH 23/33] refactor: spacing --- bio/bowtie2/align/test/Snakefile | 1 + 1 file changed, 1 insertion(+) diff --git a/bio/bowtie2/align/test/Snakefile b/bio/bowtie2/align/test/Snakefile index a1aa608f855..e75638c4aa1 100644 --- a/bio/bowtie2/align/test/Snakefile +++ b/bio/bowtie2/align/test/Snakefile @@ -71,6 +71,7 @@ use rule test_bowtie2_sam as test_bowtie2_cram with: log: "logs/bowtie2/samtools_cram_{sample}.log", + use rule test_bowtie2_sam as test_bowtie2_sort_samtools with: output: "mapped_samtools/{sample}.bam", From dff2d941fe36f9acd63cac0e3d443f9a1e746956 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Thu, 19 Dec 2024 22:10:20 +0100 Subject: [PATCH 24/33] chore: remove useless comment --- bio/bowtie2/align/wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index c88c087e108..f002e7e4de4 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -168,8 +168,8 @@ def get_extension(filename: str) -> str: # Determine which pipe command to use for converting to bam or sorting. match SORT_PROGRAM: + case "samtools": - # Correctly assign number of threads according to user request if sort_threads >= 1: SAMTOOLS_OPTS += f"--threads {sort_threads} " if BAI: From eb499d4daae6e8ec3bec1f0cf66aeefb2d119025 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Thu, 19 Dec 2024 22:15:55 +0100 Subject: [PATCH 25/33] feat: extend output checks --- bio/bowtie2/align/wrapper.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index f002e7e4de4..1f446359e62 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -59,7 +59,7 @@ def get_extension(filename: str) -> str: JAVA_OPTS = get_java_opts(snakemake) -# check +# checks # check inputs if not isinstance(SAMPLE, str) and len(SAMPLE) not in [1, 2]: @@ -82,6 +82,23 @@ def get_extension(filename: str) -> str: ) +# check ouptuts +bam_extension = get_extension(BAM) +bai_extension = get_extension(BAI) if BAI else None + +if bam_extension.lower() not in {"sam", "bam", "cram"}: + raise ValueError( + f"Unrecognized extension for output file: {bam_extension}." + "Valid extensions are 'sam', 'bam' or 'cram'" + ) + +if bai_extension not in {None, "bai", "crai"}: + raise ValueError( + f"Unrecognized extension for index file: {bai_extension}." + "Valid extensions are 'bai' or 'crai'" + ) + + # check params if not isinstance(IS_INTERLEAVED, bool): raise ValueError("params.interleaved must be a boolean") @@ -104,8 +121,8 @@ def get_extension(filename: str) -> str: "one for bowtie2 and one for samtools/picard." ) + # check input - output compatibility -bam_extension = get_extension(BAM) if bam_extension == "cram" and (REF is None or REF_FAI is None): raise ValueError( From 739c0eb1a34d72b8b10896b4ef57cb1ac28e2d3e Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Thu, 19 Dec 2024 22:24:11 +0100 Subject: [PATCH 26/33] chore: remove f-string that are constant --- bio/bowtie2/align/wrapper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index 1f446359e62..0132b35f0cf 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -191,7 +191,7 @@ def get_extension(filename: str) -> str: SAMTOOLS_OPTS += f"--threads {sort_threads} " if BAI: bam = f"{BAM}##idx##{BAI}" - SAMTOOLS_OPTS += f"--write-index " + SAMTOOLS_OPTS += "--write-index " else: bam = BAM if SORT_ORDER == "queryname": @@ -211,7 +211,7 @@ def get_extension(filename: str) -> str: if bam_extension == "cram": PICARD_OPTS += f"--REFERENCE_SEQUENCE {REF} " if BAI: - PICARD_OPTS += f"--CREATE_INDEX true " + PICARD_OPTS += "--CREATE_INDEX true " cmd_output = ( "| picard SortSam {JAVA_OPTS} {SORT_EXTRA} " "--INPUT /dev/stdin " From 0d0344864cd9d4c03a62352a27adb04210048ba4 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Thu, 19 Dec 2024 22:32:12 +0100 Subject: [PATCH 27/33] feat: extend threads checks --- bio/bowtie2/align/wrapper.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index 0132b35f0cf..26accdcd385 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -44,7 +44,6 @@ def get_extension(filename: str) -> str: # threads THREADS = snakemake.threads -sort_threads = snakemake.threads - 1 # params @@ -98,6 +97,13 @@ def get_extension(filename: str) -> str: "Valid extensions are 'bai' or 'crai'" ) +# check threads +if THREADS == 1 and SORT_PROGRAM != "none": + raise ValueError( + "Not enough threads requested. This wrapper requires at least two threads: " + "one for bowtie2 and one for samtools/picard." + ) + # check params if not isinstance(IS_INTERLEAVED, bool): @@ -156,6 +162,7 @@ def get_extension(filename: str) -> str: cmd_index = index_prefix cmd_threads = THREADS +sort_threads = snakemake.threads - 1 # extra part From a2e650468060f591b1187f3e0abaa953d7dde491 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Thu, 19 Dec 2024 22:35:20 +0100 Subject: [PATCH 28/33] refactor: coderabbit suggestions --- bio/bowtie2/align/wrapper.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index 26accdcd385..08b6593a6f1 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -62,7 +62,10 @@ def get_extension(filename: str) -> str: # check inputs if not isinstance(SAMPLE, str) and len(SAMPLE) not in [1, 2]: - raise ValueError("input must have 1 (single-end) or 2 (paired-end) elements") + raise ValueError( + "Input must have 1 (single-end) or 2 (paired-end) elements, " + f"got {len(SAMPLE)} elements" + ) REQUIRED_IDX = {".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2", ".rev.1.bt2", ".rev.2.bt2"} @@ -117,8 +120,8 @@ def get_extension(filename: str) -> str: if SORT_PROGRAM not in {"none", "samtools", "picard"}: raise ValueError( - f"Unexpected value for sort_program ({SORT_PROGRAM})" - "Valid values are 'none', 'samtools' or 'picard'" + f"Invalid sort_program '{SORT_PROGRAM}'. " + "Valid values are: 'none', 'samtools' or 'picard'" ) if SORT_PROGRAM != "none" and THREADS <= 1: @@ -153,10 +156,7 @@ def get_extension(filename: str) -> str: if get_extension(SAMPLE[0]) in ("bam", "sam"): cmd_input = f"-b {SAMPLE}" else: - if IS_INTERLEAVED: - cmd_input = f"--interleaved {SAMPLE}" - else: - cmd_input = f"-U {SAMPLE}" + cmd_input = f"--interleaved {SAMPLE}" if IS_INTERLEAVED else f"-U {SAMPLE}" else: cmd_input = f"-1 {SAMPLE[0]} -2 {SAMPLE[1]}" From 3bdc418f52e48e542b2761bad5e7a710ba63f85d Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Thu, 19 Dec 2024 22:49:43 +0100 Subject: [PATCH 29/33] refactor: consolidate checks --- bio/bowtie2/align/wrapper.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index 08b6593a6f1..5ac312689f3 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -100,13 +100,6 @@ def get_extension(filename: str) -> str: "Valid extensions are 'bai' or 'crai'" ) -# check threads -if THREADS == 1 and SORT_PROGRAM != "none": - raise ValueError( - "Not enough threads requested. This wrapper requires at least two threads: " - "one for bowtie2 and one for samtools/picard." - ) - # check params if not isinstance(IS_INTERLEAVED, bool): @@ -124,7 +117,7 @@ def get_extension(filename: str) -> str: "Valid values are: 'none', 'samtools' or 'picard'" ) -if SORT_PROGRAM != "none" and THREADS <= 1: +if SORT_PROGRAM != "none" and THREADS < 2: raise ValueError( "Not enough threads requested. This wrapper requires at least two threads: " "one for bowtie2 and one for samtools/picard." @@ -194,8 +187,7 @@ def get_extension(filename: str) -> str: match SORT_PROGRAM: case "samtools": - if sort_threads >= 1: - SAMTOOLS_OPTS += f"--threads {sort_threads} " + SAMTOOLS_OPTS += f"--threads {sort_threads} " if BAI: bam = f"{BAM}##idx##{BAI}" SAMTOOLS_OPTS += "--write-index " @@ -233,7 +225,12 @@ def get_extension(filename: str) -> str: if bam_extension == "bam": cmd_output = f"| samtools view {SAMTOOLS_OPTS} --output {BAM}" elif bam_extension == "cram": - cmd_output = f"| samtools view {SAMTOOLS_OPTS} --output {BAM} --output-fmt CRAM --reference {REF}" + cmd_output = ( + f"| samtools view {SAMTOOLS_OPTS} " + f"--output {BAM} " + "--output-fmt CRAM " + f"--reference {REF}" + ) else: cmd_output = "> {BAM} " From 949cfde2180bd57b870a18284b217b0f7bbf8fb1 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Fri, 20 Dec 2024 12:14:39 +0100 Subject: [PATCH 30/33] fix: fix picard-slim and re-pin everything --- .../align/environment.linux-64.pin.txt | 43 ++++++++----------- bio/bowtie2/align/environment.yaml | 2 +- 2 files changed, 20 insertions(+), 25 deletions(-) diff --git a/bio/bowtie2/align/environment.linux-64.pin.txt b/bio/bowtie2/align/environment.linux-64.pin.txt index 26858905546..208d5ee4af3 100644 --- a/bio/bowtie2/align/environment.linux-64.pin.txt +++ b/bio/bowtie2/align/environment.linux-64.pin.txt @@ -4,7 +4,7 @@ # created-by: conda 24.9.2 @EXPLICIT https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 -https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.8.30-hbcca054_0.conda#c27d1c142233b5bc9ca570c6e2e0c244 +https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.12.14-hbcca054_0.conda#720523eb0d6a9b0f6120c16b2aa4e7de https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45 https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6 https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb @@ -18,7 +18,8 @@ https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2# https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.2.0-h77fa898_1.conda#3cb76c3f10d3bc7f1105b2fc9db984df https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.13-hb9d3cd8_0.conda#ae1370588aa6a5157c34c73e9bbb36a0 -https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.34.3-hb9d3cd8_1.conda#ee228789a85f961d14567252a03e725f +https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.34.4-hb9d3cd8_0.conda#e2775acf57efd5af15b8e3d1d74d72d3 +https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.22-hb9d3cd8_0.conda#b422943d5d772b7cc858b36ad2a92db5 https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.4-h5888daf_0.conda#db833e03127376d461e1e13e76f09b6c https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.2.0-h69a702a_1.conda#e39480b9ca41323497b05492a63bc35b https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.6.3-hb9d3cd8_1.conda#2ecf2f1c7e4e21fcfe6423a51a992d84 @@ -26,22 +27,19 @@ https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.2.0-hc0a3c3a_1.cond https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8 https://conda.anaconda.org/conda-forge/linux-64/openssl-3.4.0-hb9d3cd8_0.conda#23cc74f77eb99315c0360ec3533147a9 https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-hb9d3cd8_1002.conda#b3c17d95b5a10c6e64a21fa17573e70e -https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hb9d3cd8_1.conda#19608a9656912805b2b9a2f6bd257b04 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hb9d3cd8_1.conda#77cbc488235ebbaab2b6e912d3934bae +https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.2-hb9d3cd8_0.conda#fb901ff28063514abb6046c9ec2c4a45 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.12-hb9d3cd8_0.conda#f6ebe2cb3f82ba6c057dde5d9debe4f7 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxdmcp-1.1.5-hb9d3cd8_0.conda#8035c64cb77ed555e3f150b7b3972480 -https://conda.anaconda.org/conda-forge/linux-64/xorg-xorgproto-2024.1-hb9d3cd8_1.conda#7c21106b851ec72c037b162c216d8f05 https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda#62ee74e96c5ebb0af99386de58cf9553 https://conda.anaconda.org/conda-forge/linux-64/giflib-5.2.2-hd590300_0.conda#3bf7b9fd5a7136126e0234db4b87c8b6 https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3 -https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.21-h4bc722e_0.conda#36ce76665bf67f5aac36be7a0d21b7f3 https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda#172bf1cd1ff8629f2b1179945ed45055 https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3 https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-hd590300_2.conda#d66573916ffcf376178462f1b61c941e https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-3.0.0-hd590300_1.conda#ea25936bb4080d843790b586850f82b8 -https://conda.anaconda.org/conda-forge/linux-64/liblzma-devel-5.6.3-hb9d3cd8_1.conda#cc4687e1814ed459f3bd6d8e05251ab2 https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-h4bc722e_0.conda#aeb98fdeb2e8f25d43ef71fbacbeec80 https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.44-hadc24fc_0.conda#f4cc49d7aa68316213e4b12be35308d1 -https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.47.0-hadc24fc_1.conda#b6f02b52a174e612e89548f4663ce56a +https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.47.2-hee588c1_0.conda#b58da17db24b6e08bcbf8fed2fb8c915 https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.1-hf672d98_0.conda#be2de152d8073ef1c01b7728475f2fe7 https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.2.0-h4852527_1.conda#8371ac6457591af2cf6159439c1fd051 https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b @@ -49,9 +47,8 @@ https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.4.0-hd590300_0.co https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.17.0-h8a09558_0.conda#92ed62436b625154323d40d5f2f11dd7 https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-he02047a_1.conda#70caf8bb6cf39a0b6b7efc885f51c0fe +https://conda.anaconda.org/conda-forge/linux-64/pixman-0.44.2-h29eaf8c_0.conda#5e2a7acfa2c24188af39e7944e1b3604 https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc -https://conda.anaconda.org/conda-forge/linux-64/xz-gpl-tools-5.6.3-hbcc6ac9_1.conda#f529917bab7862aaad6867bf2ea47a99 -https://conda.anaconda.org/conda-forge/linux-64/xz-tools-5.6.3-hb9d3cd8_1.conda#de3f31a6eed01bc2b8c7dcad07ad9034 https://conda.anaconda.org/conda-forge/linux-64/zlib-1.3.1-hb9d3cd8_2.conda#c9f075ab2f33b3bbee9e62d4ad0a6cd8 https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h59595ed_1003.conda#f87c7b7c2cb45f323ffbce941c78ab7c @@ -61,33 +58,31 @@ https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2. https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.64.0-h161d5f1_0.conda#19e57602824042dfd0446292ef90488b https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.44-hba22ea6_2.conda#df359c09c41cd186fffb93a2d87aa6f5 https://conda.anaconda.org/conda-forge/linux-64/perl-5.32.1-7_hd590300_perl5.conda#f2cfec9406850991f4e3d960cc9e3321 -https://conda.anaconda.org/conda-forge/linux-64/pixman-0.43.2-h59595ed_0.conda#71004cbf7924e19c02746ccde9fd7123 https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-he73a12e_1.conda#05a8ea5f446de33006171a7afe6ae857 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.5-he73a12e_0.conda#4c3e9fab69804ec6077697922d70c6e2 https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.10-h4f16b4b_1.conda#125f34a17d7b4bea418a83904ea82ea6 -https://conda.anaconda.org/conda-forge/linux-64/xz-5.6.3-hbcc6ac9_1.conda#62aae173382a8aae284726353c6a6a24 https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.6-ha6fb4c9_0.conda#4d056880988120e29d75bfff282e0f45 https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.15.0-h7e30c49_1.conda#8f5b0b297b59e1ac160ad4beec99dbee https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda#3f43953b7d3fb3aaa1d0d0723d91e368 https://conda.anaconda.org/conda-forge/linux-64/libglib-2.82.2-h2ff4ddf_0.conda#13e8e54035ddd2b91875ba399f0f7c04 -https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.7.0-h6565414_0.conda#80eaf80d84668fa5620ac9ec1b4bf56f -https://conda.anaconda.org/conda-forge/linux-64/python-3.13.0-h9ebbce0_101_cp313.conda#f4fea9d5bb3f2e61a39950a7ab70ee4e +https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.7.0-hc4654cb_2.conda#be54fb40ea32e8fe9dbaa94d4528b57e +https://conda.anaconda.org/conda-forge/linux-64/python-3.13.1-ha99a958_102_cp313.conda#6e7535f1d1faf524e9210d2689b3149b https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.6-hb9d3cd8_0.conda#febbab7d15033c913d53c7a2c102309d https://conda.anaconda.org/conda-forge/linux-64/xorg-libxfixes-6.0.1-hb9d3cd8_0.conda#4bdb303603e9821baf5fe5fdff1dc8f8 -https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hb9d3cd8_1.conda#a7a49a8b85122b49214798321e2e96b4 +https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.12-hb9d3cd8_0.conda#96d57aba173e878a2089d5638016dc5e https://conda.anaconda.org/conda-forge/linux-64/xorg-libxt-1.3.1-hb9d3cd8_0.conda#279b0de5f6ba95457190a1c459a64e31 -https://conda.anaconda.org/bioconda/linux-64/bowtie2-2.5.4-h7071971_4.tar.bz2#69822858766e6c8b12ae90d78d54d8ea -https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.0-hebfffa5_3.conda#fceaedf1cdbcb02df9699a0d9b005292 +https://conda.anaconda.org/bioconda/linux-64/bowtie2-2.5.4-he96a11b_5.tar.bz2#16af1b2c107e07dde4a6d98e44e72cd3 +https://conda.anaconda.org/conda-forge/linux-64/cairo-1.18.2-h3394656_1.conda#b34c2833a1f56db610aeb27f206d800d https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.16-hb7c19ff_0.conda#51bb7010fc86f70eee639b4bb7a894f5 https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3 -https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.10.1-hbbe4b11_0.conda#6e801c50a40301f6978c53976917b277 -https://conda.anaconda.org/conda-forge/noarch/pip-24.3.1-pyh145f28c_0.conda#ca3afe2d7b893a8c8cdf489d30a2b1a3 +https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.11.1-h332b0f4_0.conda#2b3e0081006dc21e8bf53a91c83a055c +https://conda.anaconda.org/conda-forge/noarch/pip-24.3.1-pyh145f28c_2.conda#76601b0ccfe1fe13a21a5f8813cb38de https://conda.anaconda.org/bioconda/noarch/snakemake-wrapper-utils-0.6.2-pyhdfd78af_0.tar.bz2#fd8759bbd04116eace828c4fab906096 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxi-1.8.2-hb9d3cd8_0.conda#17dcc85db3c7886650b8908b183d6876 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrandr-1.5.4-hb9d3cd8_0.conda#2de7f99d6581a4a7adbff607b5c278ca -https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-9.0.0-hda332d3_1.conda#76b32dcf243444aea9c6b804bcfa40b8 -https://conda.anaconda.org/bioconda/linux-64/htslib-1.21-h5efdd21_0.tar.bz2#06b995dc2244c024b45bbb3e53ae2f27 +https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-10.1.0-h0b3b770_0.conda#ab1d7d56034814f4c3ed9f69f8c68806 +https://conda.anaconda.org/bioconda/linux-64/htslib-1.21-h566b1c6_1.tar.bz2#944598fba531a668e8fafea92ca39bb4 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxtst-1.2.5-hb9d3cd8_3.conda#7bbe9a0cc0df0ac5f5a8ad6d6a11af2f -https://conda.anaconda.org/conda-forge/linux-64/openjdk-23.0.1-h4c11d01_0.conda#c40dda22ec391102c2bc24dd92f1f663 -https://conda.anaconda.org/bioconda/linux-64/samtools-1.21-h50ea8bc_0.tar.bz2#4a7fe11223f61cb2d950ed54e20c12ce +https://conda.anaconda.org/conda-forge/linux-64/openjdk-23.0.1-h68779a4_1.conda#eae06cb5a47244d3f4659f366015a85b +https://conda.anaconda.org/bioconda/linux-64/samtools-1.21-h96c455f_1.tar.bz2#0ff9d5d48561198378ad3cb34ce830bf https://conda.anaconda.org/bioconda/noarch/picard-slim-3.3.0-hdfd78af_0.tar.bz2#fab4a4639fd22cd155b740fce3064944 diff --git a/bio/bowtie2/align/environment.yaml b/bio/bowtie2/align/environment.yaml index 5bd6909f70f..9f70f26109a 100644 --- a/bio/bowtie2/align/environment.yaml +++ b/bio/bowtie2/align/environment.yaml @@ -4,6 +4,6 @@ channels: - nodefaults dependencies: - bowtie2 =2.5 - - picard-slim =3.3 + - picard-slim =3.3.0 - samtools =1.21 - snakemake-wrapper-utils =0.6 From 8242918ec7c7e431203d57ca0dc345fdce76d6f5 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Fri, 20 Dec 2024 12:36:14 +0100 Subject: [PATCH 31/33] fix: pin everything but samtools --- bio/bowtie2/align/environment.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bio/bowtie2/align/environment.yaml b/bio/bowtie2/align/environment.yaml index 9f70f26109a..7dc5dc02174 100644 --- a/bio/bowtie2/align/environment.yaml +++ b/bio/bowtie2/align/environment.yaml @@ -3,7 +3,7 @@ channels: - bioconda - nodefaults dependencies: - - bowtie2 =2.5 + - bowtie2 =2.5.4 - picard-slim =3.3.0 - samtools =1.21 - - snakemake-wrapper-utils =0.6 + - snakemake-wrapper-utils =0.6.2 From a93881ae8494db85285b39073731345e8b0f6a4c Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Fri, 20 Dec 2024 13:35:17 +0100 Subject: [PATCH 32/33] fix: --with-header --- bio/bowtie2/align/wrapper.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index 5ac312689f3..d094afb0f3d 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -223,10 +223,18 @@ def get_extension(filename: str) -> str: if sort_threads >= 1: SAMTOOLS_OPTS += f"--threads {sort_threads} " if bam_extension == "bam": - cmd_output = f"| samtools view {SAMTOOLS_OPTS} --output {BAM}" + cmd_output = ( + f"| samtools view " + "--with-header " + f"{SAMTOOLS_OPTS} " + "--output-fmt BAM " + f"--output {BAM}" + ) elif bam_extension == "cram": cmd_output = ( - f"| samtools view {SAMTOOLS_OPTS} " + "| samtools view " + "--with-header " + f"{SAMTOOLS_OPTS} " f"--output {BAM} " "--output-fmt CRAM " f"--reference {REF}" From 7ba5b461e0efa79f93a031016087cf3a01ed5658 Mon Sep 17 00:00:00 2001 From: Jorge Langa <6546145+jlanga@users.noreply.github.com> Date: Fri, 20 Dec 2024 15:56:59 +0100 Subject: [PATCH 33/33] fix: use coderrabit suggestions --- bio/bowtie2/align/wrapper.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/bio/bowtie2/align/wrapper.py b/bio/bowtie2/align/wrapper.py index d094afb0f3d..e49fd621326 100644 --- a/bio/bowtie2/align/wrapper.py +++ b/bio/bowtie2/align/wrapper.py @@ -18,9 +18,11 @@ def get_extension(filename: str) -> str: Return file format since Bowtie2 reads files that could be gzip'ed (extension: .gz) or bzip2'ed (extension: .bz2). """ - if filename.endswith((".gz", ".bz2")): - return filename.split(".")[-2].lower() - return filename.split(".")[-1].lower() + filename = filename.lower() + base, ext = path.splitext(filename) + if ext in (".gz", ".bz2"): + return path.splitext(base)[1][1:] # Remove leading dot + return ext[1:] # Remove leading dot # input @@ -31,6 +33,8 @@ def get_extension(filename: str) -> str: # output BAM = str(snakemake.output[0]) +# TODO: These outputs are temporarily disabled due to complexity with SE/PE handling +# They can be re-enabled once we implement proper SE/PE output handling # METRICS = snakemake.output.get("metrics", None) # UNALIGNED = snakemake.output.get("unaligned", None) # UNPAIRED = snakemake.output.get("unpaired", None) @@ -64,7 +68,7 @@ def get_extension(filename: str) -> str: if not isinstance(SAMPLE, str) and len(SAMPLE) not in [1, 2]: raise ValueError( "Input must have 1 (single-end) or 2 (paired-end) elements, " - f"got {len(SAMPLE)} elements" + f"got {len(SAMPLE)} elements: {SAMPLE}" ) REQUIRED_IDX = {".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2", ".rev.1.bt2", ".rev.2.bt2"} @@ -90,14 +94,14 @@ def get_extension(filename: str) -> str: if bam_extension.lower() not in {"sam", "bam", "cram"}: raise ValueError( - f"Unrecognized extension for output file: {bam_extension}." - "Valid extensions are 'sam', 'bam' or 'cram'" + f"Unrecognized extension for output file: {bam_extension}. " + "Valid extensions are: 'sam', 'bam' or 'cram'" ) if bai_extension not in {None, "bai", "crai"}: raise ValueError( - f"Unrecognized extension for index file: {bai_extension}." - "Valid extensions are 'bai' or 'crai'" + f"Unrecognized extension for index file: {bai_extension}. " + "Valid extensions are: 'bai' or 'crai'" ) @@ -225,18 +229,18 @@ def get_extension(filename: str) -> str: if bam_extension == "bam": cmd_output = ( f"| samtools view " - "--with-header " + f"--with-header " f"{SAMTOOLS_OPTS} " - "--output-fmt BAM " + f"--output-fmt BAM " f"--output {BAM}" ) elif bam_extension == "cram": cmd_output = ( - "| samtools view " - "--with-header " + f"| samtools view " + f"--with-header " f"{SAMTOOLS_OPTS} " f"--output {BAM} " - "--output-fmt CRAM " + f"--output-fmt CRAM " f"--reference {REF}" ) else: