When running the update gff file of pasa, duplicate mRNA coordinates were found. May I ask what the reason is for this? Is it normal?
The following is my code:
#! /bin/sh
sam_id=soy_sample2
root_path=/home/user/zhangzhishuai/data/5_t2t
soft_path=/home/user/zhangzhishuai/data/genk/genk_anno/GenomeAnn_genek/container
evmgff_path=${root_path}/${sam_id}/6EVMcombine
pasareseq_path=${root_path}/${sam_id}/5PASA
genome_path=${root_path}/genome
# check gff3
Singularity exec ${soft_path} / pasapipeline - v2.5.3. Sif/usr/local/SRC/pasapipeline misc_utilities/pasa_gff3_validator. Pl ${evmgff_path}/${sam_id}.EVM.gff3
# Load gff3 into the database
cp ${pasareseq_path}/alignAssembly.config ./
cp ${pasareseq_path}/Bra-pasa.sqlite./Bra-pasa.new.sqlite # Note the name of Bra-pasa.sqlite here
sed -i "s:${root_path}/${sam_id}/5PASA/Bra-pasa.sqlite:${root_path}/${sam_id}/7updatagff/Bra-pasa.new.sqlite:" alignAssembly.config # When modifying the path name in alignAssembly.config, pay attention to the modification
singularity exec ${soft_path}/pasapipeline-v2.5.3.sif /usr/local/src/PASApipeline/scripts/Load_Current_Gene_Annotations.dbi \
-c alignAssembly.config -g ${genome_path}/${sam_id}.scaffold.fasta \
-P ${evmgff_path}/${sam_id}.EVM.gff3
# Copy the configuration file and edit it
singularity exec ${soft_path}/pasapipeline-v2.5.3.sif cp /usr/local/src/PASApipeline/pasa_conf/pasa.annotationCompare.Template.txt ./annotCompare.config
sed -i "s:<__DATABASE__>:${root_path}/${sam_id}/7updatagff/Bra-pasa.new.sqlite:" ./annotCompare.config # It can be modified manually. If it doesn't work, use vim to modify the path
Singularity exec ${soft_path} / pasapipeline - v2.5.3. Sif/usr/local/SRC/pasapipeline Launch_PASA_pipeline. Pl \
--CPU 10 \
-c annotCompare.config -A \
-g ${genome_path}/${sam_id}.scaffold.fasta \
-t ${pasareseq_path}/transcript.fasta
# Extract gene gff, protein and cds sequences
awk '$1 ! ~ /^#/ && NF>1' Bra-pasa.new.sqlite.gene_structures_post_PASA_updates.*.gff3 > Bra.${sam_id}.EVM.update.gff3
singularity exec ${soft_path}/pasapipeline-v2.5.3.sif /usr/local/src/PASApipeline/misc_utilities/
gff3_file_to_proteins.pl Bra.${sam_id}.EVM.update.gff3 ${genome_path}/${sam_id}.scaffold.fasta prot > Bra.${sam_id}.EVM.update.pep.fasta
singularity exec ${soft_path}/pasapipeline-v2.5.3.sif /usr/local/src/PASApipeline/misc_utilities/
gff3_file_to_proteins.pl Bra.${sam_id}.EVM.update.gff3 ${genome_path}/${sam_id}.scaffold.fasta CDS > Bra.${sam_id}.EVM.update.cds.fasta