README.md

# Clone this repo and get in there
$ git clone https://git.science.uu.nl/papanikos/phap.git
$ cd phap

# Note the long notation --file flag; -f will not work.
$ conda create -n phap --file=environment.txt

# Activate it - use the name you gave above, if it is different
$ conda activate phap

# The (phap) prefix shows we have activated it
# Check the snakemake version
(phap) $ snakemake --version
5.30.1
$ snakemake --configfile=path/to/my_config.yaml \
<rest of snakemake options>
$ cat samples.tsv
sample	fasta
s01	/path/to/s01.fna
s02	/path/to/another.fna.gz
(phap)$ snakemake -n --use-singluarity
--singularity-args "-B /path/to/databases/data:/data"
# From within this directory
# Make sure you have defined a samplesheet
(phap)$ snakemake -p --use-singularity -j16 \
--singularity-args "-B /path/to/databases/data:/data"
$ tree -L2 results/A
results/A
├── all_predictions.tsv
├── lca.tsv
├── htp
│   ├── predictions.tsv
│   └── raw.txt
├── rafah
│   ├── A_CDS.faa
│   ├── A_CDS.fna
│   ├── A_CDS.gff
│   ├── A_CDSxMMSeqs_Clusters
│   ├── A_Genomes.fasta
│   ├── A_Genome_to_Domain_Score_Min_Score_50-Max_evalue_1e-05.tsv
│   ├── A_Ranger_Model_3_Predictions.tsv
│   ├── A_Seq_Info.tsv
│   └── predictions.tsv
├── tmp
│   ├── filtered.fa.gz
│   ├── genomes
│   └── reflist.txt
├── vhmnet
│   ├── feature_values
│   ├── predictions
│   ├── predictions.tsv
│   └── tmp
├── vhulk
│   ├── predictions.tsv
│   └── results
└── wish
    ├── llikelihood.matrix
    ├── prediction.list
    └── predictions.tsv
contig  htp_proba       vhulk_pred      vhulk_score     rafah_pred      rafah_score     vhmnet_pred     vhmnet_score    wish_pred       wish_score
NC_005964.2     0.8464285626352002      None    4.068828        Mycoplasma      0.461   Mycoplasma fermentans   0.9953  Bacteria;Tenericutes;Mollicutes;Mycoplasmatales;Mycoplasmataceae;Mycoplasma;Mycoplasma fermentans;Mycoplasma fermentans MF-I2   -1.2085700000000001
NC_015271.1     0.995161392517451       Escherichia_coli        1.0301523       Salmonella      0.495   Muricauda pacifica      0.9968  Bacteria;Proteobacteria;Gammaproteobacteria;Enterobacterales;Enterobacteriaceae;Raoultella;Raoultella sp. NCTC 9187;Raoultella sp. NCTC 9187       -1.3869200000000002
NC_023719.1     0.9999957241187084      Bacillus        0.0012575098    Bacillus        0.55    Clostridium sp. LS      1.0000  Bacteria;Firmicutes;Clostridia;Clostridiales;Clostridiaceae;Clostridium;Clostridium beijerinckii;Clostridium beijerinckii       -1.29454
contig  name    rank    lca
NC_005964.2	Mycoplasma	genus	2093
NC_015271.1	Enterobacteriaceae	family	543
NC_023719.1	Firmicutes	phylum	1239