patch: Sqt2XML fixes

16 views

Skip to first unread message

Mike Coleman

unread,

Aug 7, 2009, 5:14:49 PM8/7/09

to spctools-discuss

Here are some fixes for Sqt2XML. I can separate this into smaller
parts if needed, or provide it in another form.

Mike

commit af141a678622bed5074775b94b427dfa29f4126b
Author: Mike Coleman <mkc@delta.(none)>
Date: Fri Aug 7 16:03:24 2009 -0500

fix typo

commit c6b12aac98e802bd134d92ee17ce8c92504b8841
Author: Mike Coleman <mkc@delta.(none)>
Date: Thu Aug 6 13:08:49 2009 -0500

fix: define proton mass using double-precision constant, to avoid
losing precision

commit e74c7f634b1947be3b33e9a90b9f429f21be64d4
Author: Mike Coleman <mkc@delta.(none)>
Date: Thu Aug 6 12:29:40 2009 -0500

fix: trim whitespace from SEQUEST output to give correct peptide

also trim locus, defensively

commit fc6067ae1644f975501a587db27d904bcd7ee32f
Author: Mike Coleman <mkc@delta.(none)>
Date: Thu Aug 6 11:57:51 2009 -0500

fix: default DeltaCn is 0 (not 1)

commit bc0f748e4dc1e613dccd97c0bf4be095015f0438
Author: Mike Coleman <mkc@delta.(none)>
Date: Thu Aug 6 11:43:36 2009 -0500

fix: require only one argument

diff --git a/README.txt b/README.txt
index a4b9ed9..f0eb92c 100644
--- a/README.txt
+++ b/README.txt
@@ -248,7 +248,7 @@ You need to start with a converter to write out
search results as 'summary.xml'
You can view the search results by opening the 'summary.xml' file in
your browser.

Next, you can run xinteract to apply all or some parts of the
pipeline. Type 'xinteract' with no arguments for usage instructions.
-You can also convert and run the pipeline in one step. See xinterct
instructions for details.
+You can also convert and run the pipeline in one step. See xinteract
instructions for details.

To run the pipeline manually, starting with file1.xml and file2.xml:
diff --git a/src/Parsers/Algorithm2XML/Sqt2XML/Sqt2XML.cxx b/src/
Parsers/Algorithm2XML/Sqt2XML/Sqt2XML.cxx
index b569402..8dcd2e8 100644
--- a/src/Parsers/Algorithm2XML/Sqt2XML/Sqt2XML.cxx
+++ b/src/Parsers/Algorithm2XML/Sqt2XML/Sqt2XML.cxx
@@ -18,7 +18,15 @@
#endif
#endif

-static double fProtonMass = 1.007276f;
+static const double protonMass = 1.007276;
+
+
+string trim(string& s, const string& dropChars = " \t\f\r\n")
+{
+ string r = s.erase(s.find_last_not_of(dropChars) + 1);
+ return r.erase(0, r.find_first_not_of(dropChars));
+}
+

void writeTagArray( SimpleXMLWriter& writer, Array<Tag*>* tagArray )
{
@@ -61,7 +69,7 @@ int main(int argc, char** argv) {
string paramsFilepath = "sequest.params";
char* sample_enzyme = new char[128];

- int minArgs = 2;
+ int minArgs = 1;
int flagArgs = 0;

strcpy(sample_enzyme, "trypsin");
@@ -306,6 +314,8 @@ int main(int argc, char** argv) {
tokenStart = tokenEnd+1; tokenEnd = inputLine.find( '\t',
tokenStart+1 );
//cout << "\"" << inputLine.substr( tokenStart, tokenEnd-
tokenStart ) << "\"\n";
m.sequence = inputLine.substr( tokenStart, tokenEnd-tokenStart );
+ // SEQUEST output includes extra whitespace, so must trim here!
+ m.sequence = trim( m.sequence );
//m.sequence = m.sequence.substr( 2, m.sequence.length() - 4 ); //
trim flanking residue notation
//m.sequence = ConvertSqtPtmToFreiPtm( r.sequence,
&fileResidueMap );
//cout << m.sequence << endl;
@@ -319,7 +329,9 @@ int main(int argc, char** argv) {
tokenStart = 2; // skip L and \t
tokenEnd = inputLine.find_first_of( "\r\n\t ", tokenStart+1 );
//cout << "\"" << inputLine.substr( tokenStart, tokenEnd-
tokenStart ) << "\"\n";
- m.loci.push_back( L_entry( inputLine.substr( tokenStart, tokenEnd-
tokenStart ) ) );
+ string locus = inputLine.substr( tokenStart, tokenEnd-
tokenStart );
+ locus = trim( locus );
+ m.loci.push_back( L_entry( locus ) );

getline( sqtFile, inputLine );
}
@@ -338,7 +350,7 @@ int main(int argc, char** argv) {
writer.attr( "spectrum", spectrumId.str() );
writer.attr( "start_scan", s.firstScan );
writer.attr( "end_scan", s.lastScan );
- float precursorNeutralMass = s.observedMassPlus1 - fProtonMass;
+ float precursorNeutralMass = s.observedMassPlus1 - protonMass;
writer.attr( "precursor_neutral_mass", precursorNeutralMass );
writer.attr( "assumed_charge", s.chargeState );
writer.attr( "index", index++ );
@@ -390,7 +402,7 @@ int main(int argc, char** argv) {
writer.attr( "num_tot_proteins", (int) m.loci.size() );
writer.attr( "num_matched_ions", m.matchedIons );
writer.attr( "tot_num_ions", m.predictedIons );
- float sequenceNeutralMass = m.calculatedMassPlus1 - fProtonMass;
+ float sequenceNeutralMass = m.calculatedMassPlus1 - protonMass;
writer.attr( "calc_neutral_pep_mass", sequenceNeutralMass );
float precursorToSequenceMassError = s.observedMassPlus1 -
m.calculatedMassPlus1;
writer.attr( "massdiff", precursorToSequenceMassError );
@@ -415,7 +427,7 @@ int main(int argc, char** argv) {
writer.attr( "value", m.xcorr );
writer.close();

- float nextDeltaCn = 1.0f;
+ float nextDeltaCn = 0; // unless we find better, assume the
worst
size_t nextRankIndex = matchIndex+1;
while( nextRankIndex < s.matches.size() )
{

Reply all

Reply to author

Forward

0 new messages