Skip to content
Snippets Groups Projects
Commit 776827fb authored by Nikos Pappas's avatar Nikos Pappas
Browse files

add size filtering rule

parent 7bedfc07
No related branches found
No related tags found
No related merge requests found
......@@ -51,6 +51,7 @@ def collect_prediction_tsvs(wc):
rule all:
input:
expand([
"results/{sample}/tmp/filtered.fa.gz",
"results/{sample}/tmp/reflist.txt",
"results/{sample}/{tool}/predictions.tsv",
"results/{sample}/all_predictions.tsv",
......@@ -61,9 +62,24 @@ rule all:
###########
## RULES ##
###########
rule split_multifasta:
rule size_filter:
input:
multifasta_fp = get_sample_fasta
output:
filtered_fasta = "results/{sample}/tmp/filtered.fa.gz"
threads: 4
log:
"logs/{sample}/size_filter.log"
params:
min_size = 5000
shell:
"seqkit seq -g -j {threads} -m {params.min_size} "
"{input.multifasta_fp} | gzip -c >{output.filtered_fasta} 2>{log}"
rule split_multifasta:
input:
multifasta_fp = rules.size_filter.output.filtered_fasta
output:
reflist = "results/{sample}/tmp/reflist.txt"
log: "logs/{sample}/split_multifasta.log"
......@@ -109,8 +125,6 @@ rule filter_vhulk:
# RAFAH
rule run_rafah:
input:
# Run vhulk first since it writing in the input
rules.run_vhulk.output.done_txt,
reflist = rules.split_multifasta.output.reflist,
output:
seq_info = "results/{sample}/rafah/{sample}_Seq_Info.tsv"
......@@ -140,8 +154,6 @@ rule filter_rafah:
# VirHostMatcher-Net
rule run_vhmnet:
input:
# Run vhulk first since it writing in the input
rules.run_vhulk.output.done_txt,
reflist = rules.split_multifasta.output.reflist,
output:
done = touch("results/{sample}/vhmnet/.done.txt")
......@@ -149,11 +161,10 @@ rule run_vhmnet:
# use it with
# snakemake --singularity-args "-B /path/to/data/:/data" ...
data_dir = "/data",
#data_dir = DATA_DIR,
tmp_dir = "results/{sample}/vhmnet/tmp",
output_dir = "results/{sample}/vhmnet",
fasta_dir = "results/{sample}/tmp/genomes"
threads: 8
threads: 12
container:
"library://papanikos_182/default/vhmnet:0.1"
log:
......@@ -204,7 +215,6 @@ rule run_wish:
# snakemake --singularity-args binds the whole data dir to /data
# see run_vhmnet rule
models_dir = "/data/host_wish_model",
#models_dir = DATA_DIR.joinpath("host_wish_model"),
output_dir = "results/{sample}/wish",
fasta_dir = "results/{sample}/tmp/genomes"
shell:
......@@ -247,7 +257,7 @@ rule run_htp:
pred=$(viruses_classifier \
--classifier svc \
--nucleic_acid dna \
-p ${{f}}) 2>{log.stderr}
-p ${{f}} 2>{log.stderr})
echo -e $contig_id\t$pred >> {output.htp_raw}
done
"""
......@@ -261,7 +271,7 @@ rule process_htp:
ob = "{"
shell:
"""
tail -n +2 results/A/htp/raw.txt | cut -f1 -d','| \
tail -n +2 results/{wildcards.sample}/htp/raw.txt | cut -f1 -d','| \
sed -r "s/ \{params.ob}'phage'\: /\t/" | sort -k1 \
>{output.predictions_tsv}
"""
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment