<Release> Simple C/C++ Perfometer: Splitting string into vector of vectors (Versions 1.x)

13 views
Skip to first unread message

Alex Vinokur

unread,
Apr 20, 2005, 9:31:05 AM4/20/05
to C++ Perfometer Google Group

###############
### Release ###
###############

=========================================
* Performance
* Comparative Performance Measurement
-----------------------------------------
* Tool : Simple C/C++ Perfometer
* Algorithm: Splitting string into vector of vectors
* Language : C++
* Version : S2VV-1.0
-----------------------------------------
* Environment: Windows 2000 Professional
Intel(R) Celeron(R) CPU 1.70 GHz
Cygwin, Mingw32 interface, Djgpp
* Compilers : GNU g++ 3.x, Microsoft C++ 13.00.9466 (Unmanaged)
=========================================



===================== 1. Testsuites : BEGIN =====================

Testsuites
----------
Unix-C-01 : Function strsep()
CPP-01 : std::getline, std::istream_iterator
CPP-02 : istream::::getline, std::istream_iterator
CPP-03 : for-loop, string::find_first_of

===================== 1. Testsuites : END =======================




===================== 2. Program files : BEGIN =====================

Program file s2vv-1-0.cpp can found at:


------ Via Google Groups ---
http://groups-beta.google.com/group/sources/msg/77993fb8841382c8
http://groups-beta.google.com/group/sources/attach/77993fb8841382c8/s2vv-1-0.cpp?part=2


------ Via Gmane Groups ---
http://article.gmane.org/gmane.comp.lang.c++.perfometer/116
http://permalink.gmane.org/gmane.comp.lang.c++.perfometer/116
http://cache.gmane.org/gmane/comp/lang/c++/perfometer/116

===================== 2. Program files : END =======================



===================== 3. Performance tests : BEGIN =====================

Raw log files can found at:


------ Via Google Groups ---
* http://groups-beta.google.com/group/log-files/msg/305be21f949acdcc

-- Cygwin, GNU g++ 3.3.3 ---
*** http://groups-beta.google.com/group/log-files/attach/305be21f949acdcc/s2vv-cyg.log?part=2

-- Mingw32 interface, GNU g++ 3.3.3 ---
*** http://groups-beta.google.com/group/log-files/attach/305be21f949acdcc/s2vv-cyg.log?part=3

-- Djgpp, GNU gpp 3.4.1 ---
*** http://groups-beta.google.com/group/log-files/attach/305be21f949acdcc/s2vv-cyg.log?part=4

-- Microsoft Microsoft C++ 13.00.9466 (Unmanaged) ---
*** http://groups-beta.google.com/group/log-files/attach/305be21f949acdcc/s2vv-cyg.log?part=4


------ Via Gmane Groups ---
* http://thread.gmane.org/gmane.comp.lang.c++.perfometer/117
* http://comments.gmane.org/gmane.comp.lang.c++.perfometer/117

-- Cygwin, GNU g++ 3.3.3 ---
*** http://cache.gmane.org/gmane/comp/lang/c++/perfometer/117-001.bin

-- Mingw32 interface, GNU g++ 3.3.3 ---
*** http://cache.gmane.org/gmane/comp/lang/c++/perfometer/117-002.bin

-- Djgpp, GNU gpp 3.4.1 ---
*** http://cache.gmane.org/gmane/comp/lang/c++/perfometer/117-003.bin

-- Microsoft Microsoft C++ 13.00.9466 (Unmanaged) ---
*** http://cache.gmane.org/gmane/comp/lang/c++/perfometer/117-004.bin


===================== 3. Performance tests : END =======================




===================== 4. Methods of splitting : BEGIN =====================


#define NEWLINE_CHAR '\n'

#define WORD_DELIMS " \t"
#define LINE_DELIMS "\n"


// --------------------------------------
// Auxilary function-1
vector<string> split_str_via_strsep (
const string& str_i,
const char * const delims_i
)
// ---------------------------------------
{
vector<string> v;
if (str_i.empty()) return v;

const size_t size (str_i.size());

char* str = new char [size + 1];
strcpy (str, str_i.c_str());
if (strchr (delims_i, str[size - 1])) str[size - 1] = 0;

char *token;
while ((token = strsep (&str, delims_i))) v.push_back (token);
delete [] str;
return v;
}


// --------------------------------------
// Auxilary function-2
vector<string> split_str_via_find_first (
const string& str_i,
const char * const delims_i
)
// --------------------------------------
{
vector<string> v;
if (str_i.empty()) return v;

const string::size_type size = str_i.size();
string::size_type pos1 = 0;

while (pos1 < size)
{
string::size_type pos2 = str_i.find_first_of(delims_i, pos1);
if (pos2 == string::npos) pos2 = size;
v.push_back(str_i.substr(pos1, pos2 - pos1));
pos1 = pos2 + 1;
}
return v;
}



### Unix-C-01: Function strsep() ###
// --------------------------------------
vector<vector<string> > Unix_C_01__strsep (const string& str_i)
// --------------------------------------
{
vector<vector<string> > vv;
vector<string> line = split_str_via_strsep (str_i, LINE_DELIMS);
for (size_t i = 0; i < line.size(); i++)
{
vector<string> v (split_str_via_strsep (line[i], WORD_DELIMS));
v.erase(remove(v.begin(), v.end(), string()), v.end());
vv.push_back (v);
}
return vv;
}


### CPP-01: std::getline, std::istream_iterator ###
// --------------------------------------
vector<vector<string> > CPP_01__function_getline__istream_iterator (const string& str_i)
// --------------------------------------
{
vector<vector<string> > vv;
istringstream iss1 (str_i.c_str());

string line;
while (getline(iss1, line))
{
istringstream iss2 (line.c_str ());
istream_iterator<string> b(iss2), e;
vv.push_back(vector<string> (b, e));
}
return vv;
}


### CPP-02: istream::::getline, std::istream_iterator ###
// --------------------------------------
vector<vector<string> > CPP_02__method_getline__istream_iterator (const string& str_i)
// --------------------------------------
{
vector<vector<string> > vv;
istringstream iss1 (str_i.c_str());

const size_t size (str_i.size() + 1);

char* buffer = new char[size];

while (iss1.getline (buffer, size, NEWLINE_CHAR))
{
istringstream iss2 (buffer);
istream_iterator<string> b(iss2), e;
vv.push_back(vector<string> (b, e));
}
delete [] buffer;
return vv;
}


### CPP-03: for-loop, string::find_first_of ###
// --------------------------------------
vector<vector<string> > CPP_03__loop__find_first (const string& str_i)
// --------------------------------------
{
vector<vector<string> > vv;
vector<string> line = split_str_via_find_first (str_i, LINE_DELIMS);
for (size_t i = 0; i < line.size(); i++)
{
vector<string> v (split_str_via_find_first (line[i], WORD_DELIMS));
v.erase(remove(v.begin(), v.end(), string()), v.end());
vv.push_back (v);
}
return vv;
}


===================== 4. Methods of splitting : END =======================



==================== 5. Summary results : BEGIN =====================


---------------------------
5.1. GNU g++ 3.3.3 (CYGWIN)
---------------------------

### String size : 1000
### Number of runs : 1
### Number of tests : 5
### Number of repetitions : 25
### CLOCKS_PER_SEC : 1000


=== String-A: 1000 lines, 0 fields
=== String-B: 1 line, 0 fields
=== String-C: 33 lines, 213 fields
=== String-D: 9 lines, 32 fields



--- String-A ---
Unix_C_01__strsep : 2083 units (2.083 secs)
CPP_01__function_getline__istream_iterator : 4737 units (4.737 secs)
CPP_02__method_getline__istream_iterator : 4443 units (4.443 secs)
CPP_03__loop__find_first : 2243 units (2.243 secs)

--- String-B ---
Unix_C_01__strsep : 614 units (0.614 secs)
CPP_01__function_getline__istream_iterator : 263 units (0.263 secs)
CPP_02__method_getline__istream_iterator : 16 units (0.016 secs)
CPP_03__loop__find_first : 764 units (0.764 secs)

--- String-C ---
Unix_C_01__strsep : 784 units (0.784 secs)
CPP_01__function_getline__istream_iterator : 1832 units (1.832 secs)
CPP_02__method_getline__istream_iterator : 1652 units (1.652 secs)
CPP_03__loop__find_first : 821 units (0.821 secs)

--- String-D ---
Unix_C_01__strsep : 137 units (0.137 secs)
CPP_01__function_getline__istream_iterator : 1315 units (1.315 secs)
CPP_02__method_getline__istream_iterator : 1071 units (1.071 secs)
CPP_03__loop__find_first : 150 units (0.150 secs)



--------------------------
5.2. GNU g++ 3.3.3 (MINGW)
--------------------------

### String size : 1000
### Number of runs : 1
### Number of tests : 5
### Number of repetitions : 25
### CLOCKS_PER_SEC : 1000


=== String-A: 1000 lines, 0 fields
=== String-B: 1 line, 0 fields
=== String-C: 33 lines, 213 fields
=== String-D: 9 lines, 24 fields



--- String-A ---
CPP_01__function_getline__istream_iterator : 471 units (0.471 secs)
CPP_02__method_getline__istream_iterator : 427 units (0.427 secs)
CPP_03__loop__find_first : 234 units (0.234 secs)

--- String-B ---
CPP_01__function_getline__istream_iterator : 20 units (0.020 secs)
CPP_02__method_getline__istream_iterator : 6 units (0.006 secs)
CPP_03__loop__find_first : 73 units (0.073 secs)

--- String-C ---
CPP_01__function_getline__istream_iterator : 127 units (0.127 secs)
CPP_02__method_getline__istream_iterator : 103 units (0.103 secs)
CPP_03__loop__find_first : 63 units (0.063 secs)

--- String-D ---
CPP_01__function_getline__istream_iterator : 63 units (0.063 secs)
CPP_02__method_getline__istream_iterator : 43 units (0.043 secs)
CPP_03__loop__find_first : 6 units (0.006 secs)




------------------------------
5.3. GNU gpp 3.4.1 (DJGPP 2.3)
------------------------------


### String size : 1000
### Number of runs : 1
### Number of tests : 5
### Number of repetitions : 300
### CLOCKS_PER_SEC : 91


=== String-A: 1000 lines, 0 fields
=== String-B: 1 line, 0 fields
=== String-C: 33 lines, 213 fields
=== String-D: 8 lines, 28 fields



--- String-A ---
Unix_C_01__strsep : 183 units (2.011 secs)
CPP_01__function_getline__istream_iterator : 250 units (2.747 secs)
CPP_02__method_getline__istream_iterator : 241 units (2.648 secs)
CPP_03__loop__find_first : 178 units (1.956 secs)

--- String-B ---
Unix_C_01__strsep : 33 units (0.363 secs)
CPP_01__function_getline__istream_iterator : 5 units (0.055 secs)
CPP_02__method_getline__istream_iterator : 0 units (0.000 secs)
CPP_03__loop__find_first : 31 units (0.341 secs)

--- String-C ---
Unix_C_01__strsep : 68 units (0.747 secs)
CPP_01__function_getline__istream_iterator : 78 units (0.857 secs)
CPP_02__method_getline__istream_iterator : 76 units (0.835 secs)
CPP_03__loop__find_first : 73 units (0.802 secs)

--- String-D ---
Unix_C_01__strsep : 10 units (0.110 secs)
CPP_01__function_getline__istream_iterator : 10 units (0.110 secs)
CPP_02__method_getline__istream_iterator : 13 units (0.143 secs)
CPP_03__loop__find_first : 11 units (0.121 secs)





-----------------------------------------
5.4. Microsoft C++ 13.00.9466 (Unmanaged)
-----------------------------------------


YOUR COMMAND LINE : a-ms.exe 1000 5 25

### String size : 1000
### Number of runs : 1
### Number of tests : 5
### Number of repetitions : 25
### CLOCKS_PER_SEC : 1000


=== String-A: 1000 lines, 0 fields
=== String-B: 1 line, 0 fields
=== String-C: 33 lines, 213 fields
=== String-D: 6 lines, 18 fields



--- String-A ---
CPP_01__function_getline__istream_iterator : 437 units (0.437 secs)
CPP_02__method_getline__istream_iterator : 443 units (0.443 secs)
CPP_03__loop__find_first : 250 units (0.250 secs)

--- String-B ---
CPP_01__function_getline__istream_iterator : 36 units (0.036 secs)
CPP_02__method_getline__istream_iterator : 20 units (0.020 secs)
CPP_03__loop__find_first : 147 units (0.147 secs)

--- String-C ---
CPP_01__function_getline__istream_iterator : 186 units (0.186 secs)
CPP_02__method_getline__istream_iterator : 150 units (0.150 secs)
CPP_03__loop__find_first : 127 units (0.127 secs)

--- String-D ---
CPP_01__function_getline__istream_iterator : 60 units (0.060 secs)
CPP_02__method_getline__istream_iterator : 50 units (0.050 secs)
CPP_03__loop__find_first : 26 units (0.026 secs)



==================== 5. Summary results : END =======================


--
Alex Vinokur
email: alex DOT vinokur AT gmail DOT com
http://mathforum.org/library/view/10978.html
http://sourceforge.net/users/alexvn






Reply all
Reply to author
Forward
0 new messages