On 28.03.2019 11:01, Juha Nieminen wrote:
> Paavo Helde <
myfir...@osa.pri.ee> wrote:
>> If file reading is a performance bottleneck then one should use mmap
>> instead.
>
> In which version of the C++ standard was mmap introduced?
That's what I said. You care about other things more than about the
performance. The other things appear to be standard conformance and
convenience (reading the whole file in in one go).
There is nothing wrong about these preferences, that's a perfectly fine
approach, but then it sounds a bit silly to complain about the time
wasted on std::vector initialization.
I just made a little performance test, reading a 2.3 GB file and summing
all its bytes. The results are here:
large vector: 1.55176 s
large new[] : 1.40286 s, 9.59564 % win
small vector: 0.768879 s, 50.4511 % win
small new[] : 0.759881 s, 51.031 % win
mmap : 0.46249 s, 70.1958 % win
Here, large means the whole file read into a single buffer, and small
means a 16k buffer.
IIRC your approach was "large vector" (read the whole file into a
std::vector). So, using an uninitialized buffer with new[] would win ca
10% in this task (that's much more than I expected, must be because the
file is already in OS caches). That's the overhead you complained about.
However, by using a smaller buffer and thus reducing stress on memory
allocator you can win 50% instead, fully standard-conformant.
And finally, if you care about performance more than having pure
standard-conforming code, then you can use memory mapping and win 72%.
Code follows (Windows-only, no error checks, sorry):
#include <iostream>
#include <numeric>
#include <string>
#include <functional>
#include <chrono>
#include <algorithm>
#include <io.h>
#include <Windows.h>
int main() {
std::string filename = "D:/test/columbus/Case 00647038.zip";
unsigned int x1, x2, x3, x4, x5;
// put mmap first to warm caches up and still win
auto start3 = std::chrono::steady_clock::now();
{
HANDLE h = ::CreateFileA(filename.c_str(), GENERIC_READ,
FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL, NULL);
LARGE_INTEGER li;
::GetFileSizeEx(h, &li);
size_t n = li.QuadPart;
HANDLE m = ::CreateFileMapping(h, NULL, PAGE_READONLY, 0, 0, NULL);
unsigned char* view = static_cast<unsigned char*>(::MapViewOfFile(m,
FILE_MAP_READ, 0, 0, n));
x3 = std::accumulate(view, view+n, 0u);
::UnmapViewOfFile(view);
::CloseHandle(m);
::CloseHandle(h);
}
auto finish3 = std::chrono::steady_clock::now();
auto start1 = std::chrono::steady_clock::now();
{
FILE* f = fopen(filename.c_str(), "rb");
size_t n = _filelengthi64(fileno(f));
std::vector<unsigned char> a(n);
fread(a.data(), 1, n, f);
fclose(f);
x1 = std::accumulate(a.begin(), a.end(), 0u);
}
auto finish1 = std::chrono::steady_clock::now();
auto start2 = std::chrono::steady_clock::now();
{
FILE* f = fopen(filename.c_str(), "rb");
size_t n = _filelengthi64(fileno(f));
unsigned char* b = new unsigned char[n];
fread(b, 1, n, f);
x2 = std::accumulate(b, b+n, 0u);
delete[] b;
fclose(f);
}
auto finish2 = std::chrono::steady_clock::now();
auto start4 = std::chrono::steady_clock::now();
{
FILE* f = fopen(filename.c_str(), "rb");
size_t n = _filelengthi64(fileno(f));
const size_t bufferSize = 4*4096;
std::vector<unsigned char> a(bufferSize);
x4 = 0;
while (true) {
size_t k = fread(a.data(), 1, bufferSize, f);
x4 = std::accumulate(a.data(), a.data()+k, x4);
if (k<bufferSize) {
break;
}
}
fclose(f);
}
auto finish4 = std::chrono::steady_clock::now();
auto start5 = std::chrono::steady_clock::now();
{
FILE* f = fopen(filename.c_str(), "rb");
size_t n = _filelengthi64(fileno(f));
const size_t bufferSize = 4*4096;
unsigned char* a = new unsigned char[bufferSize];
x5 = 0;
while (true) {
size_t k = fread(a, 1, bufferSize, f);
x5 = std::accumulate(a, a+k, x5);
if (k<bufferSize) {
break;
}
}
delete[] a;
fclose(f);
}
auto finish5 = std::chrono::steady_clock::now();
auto dur1 =
std::chrono::duration_cast<std::chrono::duration<double>>(finish1-start1);
auto dur2 =
std::chrono::duration_cast<std::chrono::duration<double>>(finish2-start2);
auto dur3 =
std::chrono::duration_cast<std::chrono::duration<double>>(finish3-start3);
auto dur4 =
std::chrono::duration_cast<std::chrono::duration<double>>(finish4-start4);
auto dur5 =
std::chrono::duration_cast<std::chrono::duration<double>>(finish5-start5);
std::cout << "mmap : " << dur3.count() << " s, " <<
100.0*(dur1.count()-dur3.count())/dur1.count() << " % win\n";
std::cout << "large vector: " << dur1.count() << " s\n";
std::cout << "large new[] : " << dur2.count() << " s, " <<
100.0*(dur1.count()-dur2.count())/dur1.count() << " % win\n";
std::cout << "small vector: " << dur4.count() << " s, " <<
100.0*(dur1.count()-dur4.count())/dur1.count() << " % win\n";
std::cout << "small new[] : " << dur5.count() << " s, " <<
100.0*(dur1.count()-dur5.count())/dur1.count() << " % win\n";
if (x1!=x2 || x1!=x3 || x1!=x4 || x1!=x5) {
std::cerr << "Something wrong\n";
}
return x1-x2;
}