summaryrefslogtreecommitdiffstats
path: root/io_uring/rw.c
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring/rw.c')
-rw-r--r--io_uring/rw.c176
1 files changed, 134 insertions, 42 deletions
diff --git a/io_uring/rw.c b/io_uring/rw.c
index f023ff49c688..0bcb83e4ce3c 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -31,9 +31,19 @@ struct io_rw {
rwf_t flags;
};
-static inline bool io_file_supports_nowait(struct io_kiocb *req)
+static bool io_file_supports_nowait(struct io_kiocb *req, __poll_t mask)
{
- return req->flags & REQ_F_SUPPORT_NOWAIT;
+ /* If FMODE_NOWAIT is set for a file, we're golden */
+ if (req->flags & REQ_F_SUPPORT_NOWAIT)
+ return true;
+ /* No FMODE_NOWAIT, if we can poll, check the status */
+ if (io_file_can_poll(req)) {
+ struct poll_table_struct pt = { ._key = mask };
+
+ return vfs_poll(req->file, &pt) & mask;
+ }
+ /* No FMODE_NOWAIT support, and file isn't pollable. Tough luck. */
+ return false;
}
#ifdef CONFIG_COMPAT
@@ -320,22 +330,21 @@ static int io_prep_rw_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe
{
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
struct io_ring_ctx *ctx = req->ctx;
+ struct io_rsrc_node *node;
struct io_async_rw *io;
- u16 index;
int ret;
ret = io_prep_rw(req, sqe, ddir, false);
if (unlikely(ret))
return ret;
- if (unlikely(req->buf_index >= ctx->nr_user_bufs))
+ node = io_rsrc_node_lookup(&ctx->buf_table, req->buf_index);
+ if (!node)
return -EFAULT;
- index = array_index_nospec(req->buf_index, ctx->nr_user_bufs);
- req->imu = ctx->user_bufs[index];
- io_req_set_rsrc_node(req, ctx, 0);
+ io_req_assign_buf_node(req, node);
io = req->async_data;
- ret = io_import_fixed(ddir, &io->iter, req->imu, rw->addr, rw->len);
+ ret = io_import_fixed(ddir, &io->iter, node->buf, rw->addr, rw->len);
iov_iter_save_state(&io->iter, &io->iter_state);
return ret;
}
@@ -425,7 +434,7 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
* Play it safe and assume not safe to re-import and reissue if we're
* not in the original thread group (or in task context).
*/
- if (!same_thread_group(req->task, current) || !in_task())
+ if (!same_thread_group(req->tctx->task, current) || !in_task())
return false;
return true;
}
@@ -796,8 +805,8 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
* supports async. Otherwise it's impossible to use O_NONBLOCK files
* reliably. If not, or it IOCB_NOWAIT is set, don't retry.
*/
- if ((kiocb->ki_flags & IOCB_NOWAIT) ||
- ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req)))
+ if (kiocb->ki_flags & IOCB_NOWAIT ||
+ ((file->f_flags & O_NONBLOCK && !(req->flags & REQ_F_SUPPORT_NOWAIT))))
req->flags |= REQ_F_NOWAIT;
if (ctx->flags & IORING_SETUP_IOPOLL) {
@@ -808,6 +817,11 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
kiocb->ki_flags |= IOCB_HIPRI;
kiocb->ki_complete = io_complete_rw_iopoll;
req->iopoll_completed = 0;
+ if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL) {
+ /* make sure every req only blocks once*/
+ req->flags &= ~REQ_F_IOPOLL_STATE;
+ req->iopoll_start = ktime_get_ns();
+ }
} else {
if (kiocb->ki_flags & IOCB_HIPRI)
return -EINVAL;
@@ -838,7 +852,7 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
if (force_nonblock) {
/* If the file doesn't support async, just async punt */
- if (unlikely(!io_file_supports_nowait(req)))
+ if (unlikely(!io_file_supports_nowait(req, EPOLLIN)))
return -EAGAIN;
kiocb->ki_flags |= IOCB_NOWAIT;
} else {
@@ -952,13 +966,6 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
ret = __io_read(req, issue_flags);
/*
- * If the file doesn't support proper NOWAIT, then disable multishot
- * and stay in single shot mode.
- */
- if (!io_file_supports_nowait(req))
- req->flags &= ~REQ_F_APOLL_MULTISHOT;
-
- /*
* If we get -EAGAIN, recycle our buffer and just let normal poll
* handling arm it.
*/
@@ -972,14 +979,15 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
if (issue_flags & IO_URING_F_MULTISHOT)
return IOU_ISSUE_SKIP_COMPLETE;
return -EAGAIN;
- }
-
- /*
- * Any successful return value will keep the multishot read armed.
- */
- if (ret > 0 && req->flags & REQ_F_APOLL_MULTISHOT) {
+ } else if (ret <= 0) {
+ io_kbuf_recycle(req, issue_flags);
+ if (ret < 0)
+ req_set_fail(req);
+ } else {
/*
- * Put our buffer and post a CQE. If we fail to post a CQE, then
+ * Any successful return value will keep the multishot read
+ * armed, if it's still set. Put our buffer and post a CQE. If
+ * we fail to post a CQE, or multishot is no longer set, then
* jump to the termination path. This request is then done.
*/
cflags = io_put_kbuf(req, ret, issue_flags);
@@ -1010,6 +1018,25 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
return IOU_OK;
}
+static bool io_kiocb_start_write(struct io_kiocb *req, struct kiocb *kiocb)
+{
+ struct inode *inode;
+ bool ret;
+
+ if (!(req->flags & REQ_F_ISREG))
+ return true;
+ if (!(kiocb->ki_flags & IOCB_NOWAIT)) {
+ kiocb_start_write(kiocb);
+ return true;
+ }
+
+ inode = file_inode(kiocb->ki_filp);
+ ret = sb_start_write_trylock(inode->i_sb);
+ if (ret)
+ __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
+ return ret;
+}
+
int io_write(struct io_kiocb *req, unsigned int issue_flags)
{
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
@@ -1026,7 +1053,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
if (force_nonblock) {
/* If the file doesn't support async, just async punt */
- if (unlikely(!io_file_supports_nowait(req)))
+ if (unlikely(!io_file_supports_nowait(req, EPOLLOUT)))
goto ret_eagain;
/* Check if we can support NOWAIT. */
@@ -1047,8 +1074,8 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
if (unlikely(ret))
return ret;
- if (req->flags & REQ_F_ISREG)
- kiocb_start_write(kiocb);
+ if (unlikely(!io_kiocb_start_write(req, kiocb)))
+ return -EAGAIN;
kiocb->ki_flags |= IOCB_WRITE;
if (likely(req->file->f_op->write_iter))
@@ -1112,6 +1139,78 @@ void io_rw_fail(struct io_kiocb *req)
io_req_set_res(req, res, req->cqe.flags);
}
+static int io_uring_classic_poll(struct io_kiocb *req, struct io_comp_batch *iob,
+ unsigned int poll_flags)
+{
+ struct file *file = req->file;
+
+ if (req->opcode == IORING_OP_URING_CMD) {
+ struct io_uring_cmd *ioucmd;
+
+ ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
+ return file->f_op->uring_cmd_iopoll(ioucmd, iob, poll_flags);
+ } else {
+ struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+
+ return file->f_op->iopoll(&rw->kiocb, iob, poll_flags);
+ }
+}
+
+static u64 io_hybrid_iopoll_delay(struct io_ring_ctx *ctx, struct io_kiocb *req)
+{
+ struct hrtimer_sleeper timer;
+ enum hrtimer_mode mode;
+ ktime_t kt;
+ u64 sleep_time;
+
+ if (req->flags & REQ_F_IOPOLL_STATE)
+ return 0;
+
+ if (ctx->hybrid_poll_time == LLONG_MAX)
+ return 0;
+
+ /* Using half the running time to do schedule */
+ sleep_time = ctx->hybrid_poll_time / 2;
+
+ kt = ktime_set(0, sleep_time);
+ req->flags |= REQ_F_IOPOLL_STATE;
+
+ mode = HRTIMER_MODE_REL;
+ hrtimer_setup_sleeper_on_stack(&timer, CLOCK_MONOTONIC, mode);
+ hrtimer_set_expires(&timer.timer, kt);
+ set_current_state(TASK_INTERRUPTIBLE);
+ hrtimer_sleeper_start_expires(&timer, mode);
+
+ if (timer.task)
+ io_schedule();
+
+ hrtimer_cancel(&timer.timer);
+ __set_current_state(TASK_RUNNING);
+ destroy_hrtimer_on_stack(&timer.timer);
+ return sleep_time;
+}
+
+static int io_uring_hybrid_poll(struct io_kiocb *req,
+ struct io_comp_batch *iob, unsigned int poll_flags)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ u64 runtime, sleep_time;
+ int ret;
+
+ sleep_time = io_hybrid_iopoll_delay(ctx, req);
+ ret = io_uring_classic_poll(req, iob, poll_flags);
+ runtime = ktime_get_ns() - req->iopoll_start - sleep_time;
+
+ /*
+ * Use minimum sleep time if we're polling devices with different
+ * latencies. We could get more completions from the faster ones.
+ */
+ if (ctx->hybrid_poll_time > runtime)
+ ctx->hybrid_poll_time = runtime;
+
+ return ret;
+}
+
int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
{
struct io_wq_work_node *pos, *start, *prev;
@@ -1128,7 +1227,6 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
wq_list_for_each(pos, start, &ctx->iopoll_list) {
struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list);
- struct file *file = req->file;
int ret;
/*
@@ -1139,29 +1237,23 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
if (READ_ONCE(req->iopoll_completed))
break;
- if (req->opcode == IORING_OP_URING_CMD) {
- struct io_uring_cmd *ioucmd;
-
- ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
- ret = file->f_op->uring_cmd_iopoll(ioucmd, &iob,
- poll_flags);
- } else {
- struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+ if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL)
+ ret = io_uring_hybrid_poll(req, &iob, poll_flags);
+ else
+ ret = io_uring_classic_poll(req, &iob, poll_flags);
- ret = file->f_op->iopoll(&rw->kiocb, &iob, poll_flags);
- }
if (unlikely(ret < 0))
return ret;
else if (ret)
poll_flags |= BLK_POLL_ONESHOT;
/* iopoll may have completed current req */
- if (!rq_list_empty(iob.req_list) ||
+ if (!rq_list_empty(&iob.req_list) ||
READ_ONCE(req->iopoll_completed))
break;
}
- if (!rq_list_empty(iob.req_list))
+ if (!rq_list_empty(&iob.req_list))
iob.complete(&iob);
else if (!pos)
return 0;