[PATCH] RPC: expose API for serializing access to RPC transports
The next method we abstract is the one that releases a transport,
allowing another task to have access to the transport.
Again, one generic version of this is provided for transports that
don't need the RPC client to perform congestion control, and one
version is for transports that can use the original Van Jacobson
implementation in xprt.c.
Test-plan:
Use WAN simulation to cause sporadic bursty packet loss. Look for
significant regression in performance or client stability.
Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index eee1c68..86833b7 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -133,6 +133,7 @@
struct rpc_xprt_ops {
void (*set_buffer_size)(struct rpc_xprt *xprt);
int (*reserve_xprt)(struct rpc_task *task);
+ void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
void (*connect)(struct rpc_task *task);
int (*send_request)(struct rpc_task *task);
void (*set_retrans_timeout)(struct rpc_task *task);
@@ -238,6 +239,8 @@
int xprt_prepare_transmit(struct rpc_task *task);
void xprt_transmit(struct rpc_task *task);
int xprt_adjust_timeout(struct rpc_rqst *req);
+void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
+void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
void xprt_release(struct rpc_task *task);
int xprt_destroy(struct rpc_xprt *xprt);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 2d1e8b8..e92ea99 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -153,14 +153,42 @@
return retval;
}
-
static void __xprt_lock_write_next(struct rpc_xprt *xprt)
{
struct rpc_task *task;
+ struct rpc_rqst *req;
+
+ if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
+ return;
+
+ task = rpc_wake_up_next(&xprt->resend);
+ if (!task) {
+ task = rpc_wake_up_next(&xprt->sending);
+ if (!task)
+ goto out_unlock;
+ }
+
+ req = task->tk_rqstp;
+ xprt->snd_task = task;
+ if (req) {
+ req->rq_bytes_sent = 0;
+ req->rq_ntrans++;
+ }
+ return;
+
+out_unlock:
+ smp_mb__before_clear_bit();
+ clear_bit(XPRT_LOCKED, &xprt->state);
+ smp_mb__after_clear_bit();
+}
+
+static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
+{
+ struct rpc_task *task;
if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
return;
- if (!xprt->nocong && RPCXPRT_CONGESTED(xprt))
+ if (RPCXPRT_CONGESTED(xprt))
goto out_unlock;
task = rpc_wake_up_next(&xprt->resend);
if (!task) {
@@ -168,7 +196,7 @@
if (!task)
goto out_unlock;
}
- if (xprt->nocong || __xprt_get_cong(xprt, task)) {
+ if (__xprt_get_cong(xprt, task)) {
struct rpc_rqst *req = task->tk_rqstp;
xprt->snd_task = task;
if (req) {
@@ -183,11 +211,14 @@
smp_mb__after_clear_bit();
}
-/*
- * Releases the transport for use by other requests.
+/**
+ * xprt_release_xprt - allow other requests to use a transport
+ * @xprt: transport with other tasks potentially waiting
+ * @task: task that is releasing access to the transport
+ *
+ * Note that "task" can be NULL. No congestion control is provided.
*/
-static void
-__xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
+void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
{
if (xprt->snd_task == task) {
xprt->snd_task = NULL;
@@ -198,11 +229,29 @@
}
}
-static inline void
-xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
+/**
+ * xprt_release_xprt_cong - allow other requests to use a transport
+ * @xprt: transport with other tasks potentially waiting
+ * @task: task that is releasing access to the transport
+ *
+ * Note that "task" can be NULL. Another task is awoken to use the
+ * transport if the transport's congestion window allows it.
+ */
+void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+ if (xprt->snd_task == task) {
+ xprt->snd_task = NULL;
+ smp_mb__before_clear_bit();
+ clear_bit(XPRT_LOCKED, &xprt->state);
+ smp_mb__after_clear_bit();
+ __xprt_lock_write_next_cong(xprt);
+ }
+}
+
+static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
{
spin_lock_bh(&xprt->transport_lock);
- __xprt_release_write(xprt, task);
+ xprt->ops->release_xprt(xprt, task);
spin_unlock_bh(&xprt->transport_lock);
}
@@ -237,7 +286,7 @@
return;
req->rq_cong = 0;
xprt->cong -= RPC_CWNDSCALE;
- __xprt_lock_write_next(xprt);
+ __xprt_lock_write_next_cong(xprt);
}
/*
@@ -256,7 +305,7 @@
cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd;
if (cwnd > RPC_MAXCWND(xprt))
cwnd = RPC_MAXCWND(xprt);
- __xprt_lock_write_next(xprt);
+ __xprt_lock_write_next_cong(xprt);
} else if (result == -ETIMEDOUT) {
cwnd >>= 1;
if (cwnd < RPC_CWNDSCALE)
@@ -693,7 +742,7 @@
task->tk_status = -ENOTCONN;
else if (!req->rq_received)
rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer);
- __xprt_release_write(xprt, task);
+ xprt->ops->release_xprt(xprt, task);
spin_unlock_bh(&xprt->transport_lock);
return;
}
@@ -792,7 +841,7 @@
if (!(req = task->tk_rqstp))
return;
spin_lock_bh(&xprt->transport_lock);
- __xprt_release_write(xprt, task);
+ xprt->ops->release_xprt(xprt, task);
__xprt_put_cong(xprt, req);
if (!list_empty(&req->rq_list))
list_del(&req->rq_list);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index fc4fbe8..8589c1a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1046,6 +1046,7 @@
static struct rpc_xprt_ops xs_udp_ops = {
.set_buffer_size = xs_udp_set_buffer_size,
.reserve_xprt = xprt_reserve_xprt_cong,
+ .release_xprt = xprt_release_xprt_cong,
.connect = xs_connect,
.send_request = xs_udp_send_request,
.set_retrans_timeout = xprt_set_retrans_timeout_rtt,
@@ -1056,6 +1057,7 @@
static struct rpc_xprt_ops xs_tcp_ops = {
.set_buffer_size = xs_tcp_set_buffer_size,
.reserve_xprt = xprt_reserve_xprt,
+ .release_xprt = xprt_release_xprt,
.connect = xs_connect,
.send_request = xs_tcp_send_request,
.set_retrans_timeout = xprt_set_retrans_timeout_def,