Hello!
I have ran the MSstats PTM algorithm in my TMT Phospho data, but unfortunately after doing a PCA analysis of the output, it the batch effect persists (attached PDF). I wonder if I did something wrong while applying it. Could you help me with this issue?
This is my code:
library(MSstatsPTM)
library(readxl)
maxq_tmt_evidence <- read.table("data/evidence.txt", sep = "\t", header = TRUE)
maxq_tmt_annotation <- read_excel("data/annotation_file.xlsx")
head(maxq_tmt_evidence)
head(maxq_tmt_annotation)
msstats_format_tmt = MaxQtoMSstatsPTMFormat(evidence=maxq_tmt_evidence,
annotation=maxq_tmt_annotation,
fasta=('data/Musmusculus_uniprotkb_AND_reviewed_true_AND_model_o_2024_06_25.fasta'),
fasta_protein_name="uniprot_ac",
mod_id="\\(Phospho \\(STY\\)\\)",
use_unmod_peptides=TRUE,
labeling_type = "TMT",
which_proteinid_ptm = "Proteins")
head(msstats_format_tmt$PROTEIN)
write.csv(msstats_format_tmt$PROTEIN,
file = "outputs/msstats_format.csv",
row.names = FALSE)
write.csv(msstats_format_tmt$PROTEIN,
file = "outputs/ProteinLevelData_BeforeMSSTats.csv",
row.names = FALSE)
dataSummarizationPTM(
data,
logTrans = 2,
normalization = "equalizeMedians",
normalization.PTM = "equalizeMedians",
nameStandards = NULL,
nameStandards.PTM = NULL,
featureSubset = "all",
featureSubset.PTM = "all",
remove_uninformative_feature_outlier = FALSE,
remove_uninformative_feature_outlier.PTM = FALSE,
min_feature_count = 2,
min_feature_count.PTM = 1,
n_top_feature = 3,
n_top_feature.PTM = 3,
summaryMethod = "TMP",
equalFeatureVar = TRUE,
censoredInt = "NA",
MBimpute = TRUE,
MBimpute.PTM = TRUE,
remove50missing = FALSE,
fix_missing = NULL,
maxQuantileforCensored = 0.999,
use_log_file = TRUE,
append = TRUE,
verbose = TRUE,
log_file_path = NULL,
base = "MSstatsPTM_log_"
)
# View the first few rows of the summarized data
head(summary_data_tmt, n = 20)
write.csv(summary_data_tmt[["PTM"]][["ProteinLevelData"]],
file = "outputs/MsStatsAdjustedProteinLevelData_Global_LastTest.csv",
row.names = FALSE)
# Extract protein-level data from summarized results
protein_data <- summary_data_tmt[["PTM"]][["ProteinLevelData"]]
Also I am displaying the annotation and s subset of the evidence file.
Thank you for the attention!
Montoni.