#################
# Data used
#################

# Escherichia coli sequences with the following accession number: 
# SRR341549, SRR341551, SRR341552, SRR341555, SRR341557, 
# SRR341559, SRR341561, SRR341563, SRR341565, SRR341567, 
# SRR341569, SRR341571, SRR341580, 
# were downloaded from SRA: https://www.ncbi.nlm.nih.gov/sra.

# cgMLST alleles for E.coli were downloaded from Enterobase [online] 
# Enterobase.warwick.ac.uk: http://enterobase.warwick.ac.uk 
# [Accessed 18 January 2018].

# The ResFinder database used are attached as "ResFinder.fasta"

# Artificial reads from the ResFinder database were created with 
# chop_DB (attached), executed with the following parameters.

# compiling chop_DB
gcc -O3 -o chop_DB chop_DB.c

# 100 bp single end reads
chop_DB -i ResFinder.fasta -o output/

# 100 bp paired end reads, with an insert size of 205
chop_DB -i ResFinder.fasta -o output/ -pair 50


#################
# KMA
#################

# indexing of databases, ResFinder database as example
kma_index -i ResFinder.fasta -o ResFinder

# load database into shared memory
kma_shm -t_db ResFinder

# take database down form shared memory afterwards
kma_shm -t_db ResFinder -destroy

# Mapping against the resfinder database
kma -i input/sampleX.fq.gz -o output/sampleX -t_db ResFinder -shm -and -1t1
kma -i input/sampleX_*.fq.gz -o output/sampleX -t_db ResFinder -shm -and -1t1

# Mapping against cgMLST database
kma -i input/sampleX_*.fq.gz -o output/sampleX -t_db ResFinder \
-shm -and -1t1 -mem_mode

#################
# SRST2 v0.1.8
#################

# Requires: 
# perl/5.20.2
# anaconda2/4.0.0
# samtools/0.1.18 (does not work with other versions)
# bowtie2/2.2.4
# ncbi-blast/2.2.26 (required by CD-Hit)
# cd-hit/4.6.8

# indexing of databases, ResFinder database as example

# cluster the database
cd-hit-est -i ResFinder.fasta -o ResFinder_cdhit90 \
-d 0 > ResFinder_cdhit90.stdout

# convert the cdhit output to csv
cdhit_to_csv.py --cluster_file ResFinder_cdhit90.clstr \
--infasta ResFinder.fasta --outfile ResFinder.csv

# link csv to genes
csv_to_gene_db.py -t ResFinder.csv -o ResFinder_clust.fasta \
-f ResFinder.fasta -c 4

# setup bowtie2 index
srst2.py --input_se sampleX.fq.gz --output output/sampleX --log \
--gene_db ResFinder_clust.fasta

# The same procedure is repeated for the cgMLST database, 
# but that one may take some days to finish.

# Run SRST2 on single end reads
srst2.py --input_se sampleX.fq.gz --output output/sampleX --log \
--gene_db ResFinder_clust.fasta

# Run SRST2 on paired end reads
srst2.py --input_pe sampleX_1.fq.gz sampleX_2.fq.gz --output output/sampleX \
--log --gene_db ResFinder_clust.fasta
# OR
srst2.py --forward sampleX_1.fq.gz --reverse sampleX_2.fq.gz \
--output output/sampleX --log --gene_db ResFinder_clust.fasta

# The options "--min_depth 0.0 --min_coverage 1.0" was added,
# when analyzing the artificial dataset.

#################
# MGmapper 2.7
#################

# Requires: 
# perl/5.20.2
# bwa/0.7.15
# samtools/1.5
# bedtools/2.26.0

# databases is indexed as for BWA-MEM
# after indexing the paths and names of the databases,
# should be updated in the file: "MGmapper_databases.txt"

# Mapping single end reads
MGmapper_SE.pl -P MGmapper_databases.txt -R -C 2 -i sampleX.fq.gz \
-d output/sampleX

# Mapping paired end reads
MGmapper_SE.pl -P MGmapper_databases.txt -R -C 2 -i sampleX_1.fq.gz \
-j sampleX_2.fq.gz -d output/sampleX


#################
# BWA-MEM v0.7.15
#################

# Requires
# perl/5.20.2
# samtools/1.5

# indexing of database
bwa index ResFinder.fasta

# mapping single end reads
bwa mem ResFinder.fasta sampleX.fq.gz | \
samtools view -Sb - > output/sampleX.bam

# mapping paired end reads, artificial dataset
bwa mem -p ResFinder.fasta sampleX_1.fq.gz sampleX_2.fq.gz | \
samtools view -Sb - > output/sampleX.bam

# mapping paired end reads, e. coli dataset
paste < (gunzip -c sampleX_1.fq.gz | paste - - - -) \
<(gunzip -c sampleX_2.fq.gz | paste - - - -) | tr '\t' '\n' | \
bwa mem -p ResFinder.fasta - | samtools view -Sb - > output/sampleX.bam

# accept all mappings, paired end reads, e. coli dataset
paste < (gunzip -c sampleX_1.fq.gz | paste - - - -) \   
<(gunzip -c sampleX_2.fq.gz | paste - - - -) | tr '\t' '\n' | \
bwa mem -a -p ResFinder.fasta - | samtools view -Sb - > output/sampleX.bam


#################
# Bowtie2 v2.2.4
#################

# Requires
# perl/5.20.2
# samtools/1.5

# indexing of database
bowtie2-build ResFinder.fasta ResFinder

# Note that for indexing the cgMLST database several cores might be needed 
# (e.g "--threads 28"), elsewhere the indexing can take several days.

# mapping single end reads
bowtie2 -x ResFinder -U sampleX.fq.gz \
| samtools view -Sb - > output/sampleX.bam

# mapping paired end reads
bowtie2 -x ResFinder -1 sampleX_1.fq.gz -2 sampleX_2.fq.gz \
| samtools view -Sb - > output/sampleX.bam

# accepting all mappings
bowtie2 -a -x ResFinder -1 sampleX_1.fq.gz -2 sampleX_2.fq.gz \
| samtools view -Sb - > output/sampleX.bam

#################
# Minimap2 v2.6
#################

# Requires
# perl/5.20.2
# samtools/1.5

# indexing of database
minimap2 -d ResFinder ResFinder.fasta

# mapping single end reads
minimap2 -ax sr ResFinder sampleX.fq.gz | \
samtools view -Sb - > output/sampleX.bam

# mapping paired end reads
minimap2 -ax sr ResFinder sampleX_1.fq.gz sampleX_2.fq.gz | \
samtools view -Sb - > output/sampleX.bam

# accepting all / several mappings, 
# tried with -N [1000; 1000000] and -p [0.5; 1.0]
minimap2 -ax sr ResFinder sampleX_1.fq.gz sampleX_2.fq.gz -N 10000 -p 0.8 | \
samtools view -Sb - > output/sampleX.bam

#################
# samtools bedtools
#################

# Requires
# perl/5.20.2
# samtools/1.5
# bedtools/2.26.0
# bedToTemplate.pl (attached)

# Accept all mapping reads with a mapping quality above [10, 20, 30]
samtools view -d -F 4 -q [10, 20, 30] sampleX.bam | samtools sort \
| bedtools genomecov -ibam stdin | bedToTemplate.pl > sampleX.b2t

# Accept all mapping properlt paired reads
# with a mapping quality above [10, 20, 30]
samtools view -d -F 4 -f 2 -q [10, 20, 30] sampleX.bam | samtools sort \
| bedtools genomecov -ibam stdin | bedToTemplate.pl > sampleX.b2t


#################
# Salmon v0.9.1
#################

# Example with the ResFiner database, \
# mapped with BWA-MEM and Bowtie2 with "-a" option enabled
salmon quant -t ResFinder.fasta -a sampleX.bam -o sampleX_salmon -l A

