Mike Coleman
unread,Aug 7, 2009, 5:14:49 PM8/7/09Sign in to reply to author
Sign in to forward
You do not have permission to delete messages in this group
Either email addresses are anonymous for this group or you need the view member email addresses permission to view the original message
to spctools-discuss
Here are some fixes for Sqt2XML. I can separate this into smaller
parts if needed, or provide it in another form.
Mike
commit af141a678622bed5074775b94b427dfa29f4126b
Author: Mike Coleman <mkc@delta.(none)>
Date: Fri Aug 7 16:03:24 2009 -0500
fix typo
commit c6b12aac98e802bd134d92ee17ce8c92504b8841
Author: Mike Coleman <mkc@delta.(none)>
Date: Thu Aug 6 13:08:49 2009 -0500
fix: define proton mass using double-precision constant, to avoid
losing precision
commit e74c7f634b1947be3b33e9a90b9f429f21be64d4
Author: Mike Coleman <mkc@delta.(none)>
Date: Thu Aug 6 12:29:40 2009 -0500
fix: trim whitespace from SEQUEST output to give correct peptide
also trim locus, defensively
commit fc6067ae1644f975501a587db27d904bcd7ee32f
Author: Mike Coleman <mkc@delta.(none)>
Date: Thu Aug 6 11:57:51 2009 -0500
fix: default DeltaCn is 0 (not 1)
commit bc0f748e4dc1e613dccd97c0bf4be095015f0438
Author: Mike Coleman <mkc@delta.(none)>
Date: Thu Aug 6 11:43:36 2009 -0500
fix: require only one argument
diff --git a/README.txt b/README.txt
index a4b9ed9..f0eb92c 100644
--- a/README.txt
+++ b/README.txt
@@ -248,7 +248,7 @@ You need to start with a converter to write out
search results as 'summary.xml'
You can view the search results by opening the 'summary.xml' file in
your browser.
Next, you can run xinteract to apply all or some parts of the
pipeline. Type 'xinteract' with no arguments for usage instructions.
-You can also convert and run the pipeline in one step. See xinterct
instructions for details.
+You can also convert and run the pipeline in one step. See xinteract
instructions for details.
To run the pipeline manually, starting with file1.xml and file2.xml:
diff --git a/src/Parsers/Algorithm2XML/Sqt2XML/Sqt2XML.cxx b/src/
Parsers/Algorithm2XML/Sqt2XML/Sqt2XML.cxx
index b569402..8dcd2e8 100644
--- a/src/Parsers/Algorithm2XML/Sqt2XML/Sqt2XML.cxx
+++ b/src/Parsers/Algorithm2XML/Sqt2XML/Sqt2XML.cxx
@@ -18,7 +18,15 @@
#endif
#endif
-static double fProtonMass = 1.007276f;
+static const double protonMass = 1.007276;
+
+
+string trim(string& s, const string& dropChars = " \t\f\r\n")
+{
+ string r = s.erase(s.find_last_not_of(dropChars) + 1);
+ return r.erase(0, r.find_first_not_of(dropChars));
+}
+
void writeTagArray( SimpleXMLWriter& writer, Array<Tag*>* tagArray )
{
@@ -61,7 +69,7 @@ int main(int argc, char** argv) {
string paramsFilepath = "sequest.params";
char* sample_enzyme = new char[128];
- int minArgs = 2;
+ int minArgs = 1;
int flagArgs = 0;
strcpy(sample_enzyme, "trypsin");
@@ -306,6 +314,8 @@ int main(int argc, char** argv) {
tokenStart = tokenEnd+1; tokenEnd = inputLine.find( '\t',
tokenStart+1 );
//cout << "\"" << inputLine.substr( tokenStart, tokenEnd-
tokenStart ) << "\"\n";
m.sequence = inputLine.substr( tokenStart, tokenEnd-tokenStart );
+ // SEQUEST output includes extra whitespace, so must trim here!
+ m.sequence = trim( m.sequence );
//m.sequence = m.sequence.substr( 2, m.sequence.length() - 4 ); //
trim flanking residue notation
//m.sequence = ConvertSqtPtmToFreiPtm( r.sequence,
&fileResidueMap );
//cout << m.sequence << endl;
@@ -319,7 +329,9 @@ int main(int argc, char** argv) {
tokenStart = 2; // skip L and \t
tokenEnd = inputLine.find_first_of( "\r\n\t ", tokenStart+1 );
//cout << "\"" << inputLine.substr( tokenStart, tokenEnd-
tokenStart ) << "\"\n";
- m.loci.push_back( L_entry( inputLine.substr( tokenStart, tokenEnd-
tokenStart ) ) );
+ string locus = inputLine.substr( tokenStart, tokenEnd-
tokenStart );
+ locus = trim( locus );
+ m.loci.push_back( L_entry( locus ) );
getline( sqtFile, inputLine );
}
@@ -338,7 +350,7 @@ int main(int argc, char** argv) {
writer.attr( "spectrum", spectrumId.str() );
writer.attr( "start_scan", s.firstScan );
writer.attr( "end_scan", s.lastScan );
- float precursorNeutralMass = s.observedMassPlus1 - fProtonMass;
+ float precursorNeutralMass = s.observedMassPlus1 - protonMass;
writer.attr( "precursor_neutral_mass", precursorNeutralMass );
writer.attr( "assumed_charge", s.chargeState );
writer.attr( "index", index++ );
@@ -390,7 +402,7 @@ int main(int argc, char** argv) {
writer.attr( "num_tot_proteins", (int) m.loci.size() );
writer.attr( "num_matched_ions", m.matchedIons );
writer.attr( "tot_num_ions", m.predictedIons );
- float sequenceNeutralMass = m.calculatedMassPlus1 - fProtonMass;
+ float sequenceNeutralMass = m.calculatedMassPlus1 - protonMass;
writer.attr( "calc_neutral_pep_mass", sequenceNeutralMass );
float precursorToSequenceMassError = s.observedMassPlus1 -
m.calculatedMassPlus1;
writer.attr( "massdiff", precursorToSequenceMassError );
@@ -415,7 +427,7 @@ int main(int argc, char** argv) {
writer.attr( "value", m.xcorr );
writer.close();
- float nextDeltaCn = 1.0f;
+ float nextDeltaCn = 0; // unless we find better, assume the
worst
size_t nextRankIndex = matchIndex+1;
while( nextRankIndex < s.matches.size() )
{