# the reference genome was indexed using BWA
# comressed fastq files with sequences downloaded
# de-compression of files
# quality check using FasQC
# indexing of the sequenced fastq files - it is done, but is it needed?!
# keyfile preparation in Excel
# keyfile saved as tabulated .txt file
# dos2unix for the new keyfile

#STEP1 - GBSSeqToTagDBPlugin - import sequences as tags into the database

qsub -N P1_SeqToTag -q long.q@r06d7 -pe SMP 3 -V -cwd -b y /home/bin/Tassel/5.2.18/x64/run_pipeline.pl -debug ./P1_debug.txt -Xmx20G -fork1 -GBSSeqToTagDBPlugin -c 10 -db ./GBSPVDunified.db -deleteOldData true -e ApeKI -i ./AllFastqFiles/ -k ./GBS_PVd_unified_keyfile.txt -mnQS 20 -kmerLength 126 -minKmerL 20 -mxKmerNum 300000000 -batchSize 8 -endplugin -runfork1

#STEP2 - TagExportToFastqPlugin - export tags for alignment

qsub -N P2_TagToFastq -q long.q@r06d7 -pe SMP 3 -V -cwd -b y /home/bin/Tassel/5.2.18/x64/run_pipeline.pl -debug ./P2_debug.txt -Xmx15G -fork1 -TagExportToFastqPlugin -c 10 -db ./GBSPVDunified.db -o ./TagsForAlignGBSPVdUni.fa.gz -endplugin -runfork1

#STEP3 - Run alignment program(s) - align to the reference - DO NOT DO IN A QUEUE, WILL PRODUCE A BAD SAM FILE

/home/bin/bwa/0.7.12/x64/bwa mem -t 8 /home/facon/Pvulgaris218v1_ref_genome/Pvulgaris_218_v1.0.renamed.fa ./TagsForAlignGBSPVdUni.fa.gz > GBSPVDunified.sam

#STEP4 - SAMToGBSdbPlugin - import the alignment into the database

qsub -N P4_SAMtoDB -q long.q@r06d7 -pe SMP 3 -V -cwd -b y /home/bin/Tassel/5.2.18/x64/run_pipeline.pl -debug ./P4_debug.txt -Xmx15G -fork1 -SAMToGBSdbPlugin -aLen 0 -aProp 0 -i ./GBSPVDunified.sam -db ./GBSPVDunified.db -deleteOldData true -endplugin -runfork1

#STEP5 - DiscoverySNPCallerPluginV2 - calling the SNPs

qsub -N P5_SNPdisc -q long.q@r06d7 -pe SMP 3 -V -cwd -b y /home/bin/Tassel/5.2.18/x64/run_pipeline.pl -debug ./P5_debug.txt -Xmx15G -fork1 -DiscoverySNPCallerPluginV2 -db ./GBSPVDunified.db -inclGaps false -inclRare false -mnLCov 0.1 -mnMAF 0.01 -sC 1 -eC 11 -deleteOldData true -endplugin -runfork1

#STEPS 6,7 & 8 are for SNP stat file printing, SNP position quality update and DataBase printing, so they were skipped in this moment

#STEP9a - ProductionSNPCallerPluginV2 - PRODUCTION PIPELINE

qsub -N P9a_SNPcall -q long.q@r06d7 -pe SMP 4 -V -cwd -b y /home/bin/Tassel/5.2.18/x64/run_pipeline.pl -debug ./P9a_debug.txt -Xmx15G -fork1 -ProductionSNPCallerPluginV2 -batchSize 8 -d 0 -db ./GBSPVDunified.db -e ApeKI -eR 0.01 -i ./AllFastqFiles/ -k ./GBS_PVd_unified_keyfile.txt -ko true -kmerLength 126 -minPosQS 0 -mnQS 20 -do true -o GBSPVdUnifiedResultsOpen.h5 -endplugin -runfork1

#STEP9b - BuildUnfinishedHDF5GenotypesPlugin - CLOSING THE HDF5 FILE

qsub -N P9b_H5close -q long.q@r06d7 -pe SMP 3 -V -cwd -b y /home/bin/Tassel/5.2.18/x64/run_pipeline.pl -debug ./P9b_debug.txt -Xmx10G -fork1 -BuildUnfinishedHDF5GenotypesPlugin -i ./GBSPVdUnifiedResultsOpen.h5 -o ./GBSPVdUnifiedResultsClosed.h5 -endplugin -runfork1