From: Avi Kivity <
a...@scylladb.com>
Committer: Avi Kivity <
a...@scylladb.com>
Merge "separate-dma-alignment"
DMA read and write alignments are different for optimal performance.
---
diff --git a/core/file.hh b/core/file.hh
--- a/core/file.hh
+++ b/core/file.hh
@@ -75,7 +75,8 @@ struct directory_entry {
class file_impl {
public:
unsigned _memory_dma_alignment = 4096;
- unsigned _disk_dma_alignment = 4096;
+ unsigned _disk_read_dma_alignment = 4096;
+ unsigned _disk_write_dma_alignment = 4096;
public:
virtual ~file_impl() {}
@@ -196,9 +197,14 @@ public:
// we will end up with various pages around, some of them with
// overlapping ranges. Those would be very challenging to cache.
- /// Alignment requirement for file offsets
- uint64_t disk_dma_alignment() const {
- return _file_impl->_disk_dma_alignment;
+ /// Alignment requirement for file offsets (for reads)
+ uint64_t disk_read_dma_alignment() const {
+ return _file_impl->_disk_read_dma_alignment;
+ }
+
+ /// Alignment requirement for file offsets (for writes)
+ uint64_t disk_write_dma_alignment() const {
+ return _file_impl->_disk_write_dma_alignment;
}
/// Alignment requirement for data buffers
@@ -497,14 +503,14 @@ future<temporary_buffer<CharType>>
file::dma_read_bulk(uint64_t offset, size_t range_size) {
using tmp_buf_type = typename read_state<CharType>::tmp_buf_type;
- auto front = offset & (disk_dma_alignment() - 1);
+ auto front = offset & (disk_read_dma_alignment() - 1);
offset -= front;
range_size += front;
auto rstate = make_lw_shared<read_state<CharType>>(offset, front,
range_size,
memory_dma_alignment(),
-
disk_dma_alignment());
+
disk_read_dma_alignment());
//
// First, try to read directly into the buffer. Most of the reads will
@@ -560,7 +566,7 @@ file::read_maybe_eof(uint64_t pos, size_t len) {
// an EINVAL error due to unaligned destination buffer.
//
temporary_buffer<CharType> buf = temporary_buffer<CharType>::aligned(
- memory_dma_alignment(), align_up(len,
disk_dma_alignment()));
+ memory_dma_alignment(), align_up(len,
disk_read_dma_alignment()));
// try to read a single bulk from the given position
return dma_read(pos, buf.get_write(), buf.size()).then_wrapped(
diff --git a/core/fstream.cc b/core/fstream.cc
--- a/core/fstream.cc
+++ b/core/fstream.cc
@@ -65,7 +65,7 @@ class file_data_source_impl : public data_source_impl {
while (_read_buffers.size() < ra) {
++_reads_in_progress;
// if _pos is not dma-aligned, we'll get a short read.
Account for that.
- auto now = _options.buffer_size - _pos %
_file.disk_dma_alignment();
+ auto now = _options.buffer_size - _pos %
_file.disk_read_dma_alignment();
_read_buffers.push_back(_file.dma_read_bulk<char>(_pos,
now).then_wrapped(
[this] (future<temporary_buffer<char>> ret) {
issue_read_aheads();
@@ -167,15 +167,15 @@ class file_data_sink_impl : public data_sink_impl {
// put() must usually be of chunks multiple of file::dma_alignment.
// Only the last part can have an unaligned length. If put() was
// called again with an unaligned pos, we have a bug in the caller.
- assert(!(pos & (_file.disk_dma_alignment() - 1)));
+ assert(!(pos & (_file.disk_write_dma_alignment() - 1)));
bool truncate = false;
auto p = static_cast<const char*>(buf.get());
size_t buf_size = buf.size();
- if ((buf.size() & (_file.disk_dma_alignment() - 1)) != 0) {
+ if ((buf.size() & (_file.disk_write_dma_alignment() - 1)) != 0) {
// If buf size isn't aligned, copy its content into a new
aligned buf.
// This should only happen when the user calls
output_stream::flush().
- auto tmp = allocate_buffer(align_up(buf.size(),
_file.disk_dma_alignment()));
+ auto tmp = allocate_buffer(align_up(buf.size(),
_file.disk_write_dma_alignment()));
::memcpy(tmp.get_write(), buf.get(), buf.size());
buf = std::move(tmp);
p = buf.get();
diff --git a/core/reactor.cc b/core/reactor.cc
--- a/core/reactor.cc
+++ b/core/reactor.cc
@@ -554,7 +554,10 @@ posix_file_impl::query_dma_alignment() {
auto r = ioctl(_fd, XFS_IOC_DIOINFO, &da);
if (r == 0) {
_memory_dma_alignment = da.d_mem;
- _disk_dma_alignment = da.d_miniosz;
+ _disk_read_dma_alignment = da.d_miniosz;
+ // xfs wants at least the block size for writes
+ // FIXME: really read the block size
+ _disk_write_dma_alignment = std::max<unsigned>(da.d_miniosz, 4096);
}
}