The existing assert that was introduced in
528762adbaf2c6dde857957b920606378959e8fb
is too harsh. It will fail on the first retry
failure after `needs_preeempt` returned true.
This change uses the lowres_clock to allow flush to make progress
within 1 second of the first `io_submit` failure.
If the failure happens only once, flush will complete
submitting all iocbs. If another failure happens,
we check if at least one iocb has been submitted
and if so, return a "short flush", and copy the remaining
iocbs to be submitted next time flush is called.
If no progress has been made within 1 second
of the first failure an assert will fail, to prevent
an infinite loop.
Fixes #975
Test: unit(dev)
Signed-off-by: Benny Halevy <
bha...@scylladb.com>
---
src/core/reactor_backend.cc | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git a/src/core/reactor_backend.cc b/src/core/reactor_backend.cc
index 25b13090..007078bf 100644
--- a/src/core/reactor_backend.cc
+++ b/src/core/reactor_backend.cc
@@ -26,6 +26,9 @@
#include <seastar/core/internal/buffer_allocator.hh>
#include <seastar/util/defer.hh>
#include <seastar/util/read_first_line.hh>
+#include <seastar/core/lowres_clock.hh>
+
+#include <optional>
#include <chrono>
#include <sys/poll.h>
#include <sys/syscall.h>
@@ -272,7 +275,8 @@ void aio_general_context::queue(linux_abi::iocb* iocb) {
size_t aio_general_context::flush() {
auto begin = iocbs.get();
- auto retried = last;
+ constexpr lowres_clock::time_point no_time_point = lowres_clock::time_point(lowres_clock::duration(0));
+ auto retry_until = no_time_point;
while (begin != last) {
auto r = io_submit(io_context, last - begin, begin);
if (__builtin_expect(r > 0, true)) {
@@ -280,11 +284,19 @@ size_t aio_general_context::flush() {
continue;
}
// errno == EAGAIN is expected here. We don't explicitly assert that
- // since the assert below requires that some progress will be
+ // since the logic below requires that some progress will be
// made, preventing an endless loop for any reason.
- if (need_preempt()) {
- assert(retried != begin);
- retried = begin;
+ if (retry_until == no_time_point) {
+ // allow retrying for 1 second
+ retry_until = lowres_clock::now() + 1s;
+ } else if (begin != iocbs.get()) {
+ // partial flush, return the number of successful submissions
+ std::copy(begin, last, iocbs.get());
+ auto nr = begin - iocbs.get();
+ last -= nr;
+ return nr;
+ } else {
+ assert(lowres_clock::now() < retry_until);
}
}
auto nr = last - iocbs.get();
--
2.31.1