Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[PATCH net-next 0/6] net device rx busy polling support in vhost_net

30 views
Skip to first unread message

Jason Wang

unread,
Mar 31, 2016, 2:00:07 AM3/31/16
to
Hi all:

This series try to add net device rx busy polling support in
vhost_net. This is done through:

- adding socket rx busy polling support for tun/macvtap by marking
napi_id.
- vhost_net will try to find the net device through napi_id and do
busy polling if possible.

TCP_RR tests on two mlx4s show some improvements:

smp=1 queue=1
size/session/+thu%/+normalize%/+tpkts%/+rpkts%/+ioexits%/
1/ 1/ +4%/ +3%/ +3%/ +3%/ +22%
1/ 50/ +2%/ +2%/ +2%/ +2%/ 0%
1/ 100/ +1%/ 0%/ +1%/ +1%/ -1%
1/ 200/ +2%/ +1%/ +2%/ +2%/ 0%
64/ 1/ +1%/ +3%/ +1%/ +1%/ +1%
64/ 50/ 0%/ 0%/ 0%/ 0%/ -1%
64/ 100/ +1%/ 0%/ +1%/ +1%/ 0%
64/ 200/ 0%/ 0%/ +2%/ +2%/ 0%
256/ 1/ +2%/ +2%/ +2%/ +2%/ +2%
256/ 50/ +3%/ +3%/ +3%/ +3%/ 0%
256/ 100/ +1%/ +1%/ +2%/ +2%/ 0%
256/ 200/ 0%/ 0%/ +1%/ +1%/ +1%
1024/ 1/ +2%/ +2%/ +2%/ +2%/ +2%
1024/ 50/ -1%/ -1%/ -1%/ -1%/ -2%
1024/ 100/ +1%/ +1%/ 0%/ 0%/ -1%
1024/ 200/ +2%/ +1%/ +2%/ +2%/ 0%

smp=8 queue=1
size/session/+thu%/+normalize%/+tpkts%/+rpkts%/+ioexits%/
1/ 1/ +1%/ -5%/ +1%/ +1%/ 0%
1/ 50/ +1%/ 0%/ +1%/ +1%/ -1%
1/ 100/ -1%/ -1%/ -2%/ -2%/ -4%
1/ 200/ 0%/ 0%/ 0%/ 0%/ -1%
64/ 1/ -2%/ -10%/ -2%/ -2%/ -2%
64/ 50/ -1%/ -1%/ -1%/ -1%/ -2%
64/ 100/ -1%/ 0%/ 0%/ 0%/ -1%
64/ 200/ -1%/ -1%/ 0%/ 0%/ 0%
256/ 1/ +7%/ +25%/ +7%/ +7%/ +7%
256/ 50/ +2%/ +2%/ +2%/ +2%/ -1%
256/ 100/ -1%/ -1%/ -1%/ -1%/ -3%
256/ 200/ +1%/ 0%/ 0%/ 0%/ 0%
1024/ 1/ +5%/ +15%/ +5%/ +5%/ +4%
1024/ 50/ 0%/ 0%/ -1%/ -1%/ -1%
1024/ 100/ -1%/ -1%/ -1%/ -1%/ -2%
1024/ 200/ -1%/ 0%/ -1%/ -1%/ -1%

smp=8 queue=8
size/session/+thu%/+normalize%/+tpkts%/+rpkts%/+ioexits%/
1/ 1/ +5%/ +2%/ +5%/ +5%/ 0%
1/ 50/ +2%/ +2%/ +3%/ +3%/ -20%
1/ 100/ +5%/ +5%/ +5%/ +5%/ -13%
1/ 200/ +8%/ +8%/ +6%/ +6%/ -12%
64/ 1/ 0%/ +4%/ 0%/ 0%/ +18%
64/ 50/ +6%/ +5%/ +5%/ +5%/ -7%
64/ 100/ +4%/ +4%/ +5%/ +5%/ -12%
64/ 200/ +5%/ +5%/ +5%/ +5%/ -12%
256/ 1/ 0%/ -3%/ 0%/ 0%/ +1%
256/ 50/ +3%/ +3%/ +3%/ +3%/ -2%
256/ 100/ +6%/ +5%/ +5%/ +5%/ -11%
256/ 200/ +4%/ +4%/ +4%/ +4%/ -13%
1024/ 1/ 0%/ -3%/ 0%/ 0%/ -6%
1024/ 50/ +1%/ +1%/ +1%/ +1%/ -10%
1024/ 100/ +4%/ +4%/ +5%/ +5%/ -11%
1024/ 200/ +4%/ +5%/ +4%/ +4%/ -12%

Thanks

Jason Wang (6):
net: skbuff: don't use union for napi_id and sender_cpu
tuntap: socket rx busy polling support
macvtap: socket rx busy polling support
net: core: factor out core busy polling logic to sk_busy_loop_once()
net: export napi_by_id()
vhost_net: net device rx busy polling support

drivers/net/macvtap.c | 4 ++++
drivers/net/tun.c | 3 ++-
drivers/vhost/net.c | 22 ++++++++++++++++--
include/linux/skbuff.h | 10 ++++----
include/net/busy_poll.h | 8 +++++++
net/core/dev.c | 62 ++++++++++++++++++++++++++++---------------------
6 files changed, 75 insertions(+), 34 deletions(-)

--
2.5.0

Jason Wang

unread,
Mar 31, 2016, 2:00:08 AM3/31/16
to
This patch exports napi_by_id() which will be used by vhost_net socket
busy polling.

Signed-off-by: Jason Wang <jaso...@redhat.com>
---
include/net/busy_poll.h | 1 +
net/core/dev.c | 3 ++-
2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index e765e23..dc9c76d 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -74,6 +74,7 @@ static inline bool busy_loop_timeout(unsigned long end_time)

bool sk_busy_loop(struct sock *sk, int nonblock);
int sk_busy_loop_once(struct sock *sk, struct napi_struct *napi);
+struct napi_struct *napi_by_id(unsigned int napi_id);

/* used in the NIC receive handler to mark the skb */
static inline void skb_mark_napi_id(struct sk_buff *skb,
diff --git a/net/core/dev.c b/net/core/dev.c
index a2f0c46..b98d210 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4888,7 +4888,7 @@ void napi_complete_done(struct napi_struct *n, int work_done)
EXPORT_SYMBOL(napi_complete_done);

/* must be called under rcu_read_lock(), as we dont take a reference */
-static struct napi_struct *napi_by_id(unsigned int napi_id)
+struct napi_struct *napi_by_id(unsigned int napi_id)
{
unsigned int hash = napi_id % HASH_SIZE(napi_hash);
struct napi_struct *napi;
@@ -4899,6 +4899,7 @@ static struct napi_struct *napi_by_id(unsigned int napi_id)

return NULL;
}
+EXPORT_SYMBOL(napi_by_id);

#if defined(CONFIG_NET_RX_BUSY_POLL)
#define BUSY_POLL_BUDGET 8
--
2.5.0

Jason Wang

unread,
Mar 31, 2016, 2:00:08 AM3/31/16
to
This patch let vhost_net try rx busy polling of underlying net device
when busy polling is enabled. Test shows some improvement on TCP_RR:
Signed-off-by: Jason Wang <jaso...@redhat.com>
---
drivers/vhost/net.c | 22 ++++++++++++++++++++--
1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index f744eeb..7350f6c 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -27,6 +27,7 @@
#include <linux/if_vlan.h>

#include <net/sock.h>
+#include <net/busy_poll.h>

#include "vhost.h"

@@ -307,15 +308,24 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
unsigned int *out_num, unsigned int *in_num)
{
unsigned long uninitialized_var(endtime);
+ struct socket *sock = vq->private_data;
+ struct sock *sk = sock->sk;
+ struct napi_struct *napi;
int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
out_num, in_num, NULL, NULL);

if (r == vq->num && vq->busyloop_timeout) {
preempt_disable();
+ rcu_read_lock();
+ napi = napi_by_id(sk->sk_napi_id);
endtime = busy_clock() + vq->busyloop_timeout;
while (vhost_can_busy_poll(vq->dev, endtime) &&
- vhost_vq_avail_empty(vq->dev, vq))
+ vhost_vq_avail_empty(vq->dev, vq)) {
+ if (napi)
+ sk_busy_loop_once(sk, napi);
cpu_relax_lowlatency();
+ }
+ rcu_read_unlock();
preempt_enable();
r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
out_num, in_num, NULL, NULL);
@@ -476,6 +486,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
struct vhost_virtqueue *vq = &nvq->vq;
unsigned long uninitialized_var(endtime);
+ struct napi_struct *napi;
int len = peek_head_len(sk);

if (!len && vq->busyloop_timeout) {
@@ -484,13 +495,20 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
vhost_disable_notify(&net->dev, vq);

preempt_disable();
+ rcu_read_lock();
+
+ napi = napi_by_id(sk->sk_napi_id);
endtime = busy_clock() + vq->busyloop_timeout;

while (vhost_can_busy_poll(&net->dev, endtime) &&
skb_queue_empty(&sk->sk_receive_queue) &&
- vhost_vq_avail_empty(&net->dev, vq))
+ vhost_vq_avail_empty(&net->dev, vq)) {
+ if (napi)
+ sk_busy_loop_once(sk, napi);
cpu_relax_lowlatency();
+ }

+ rcu_read_unlock();
preempt_enable();

if (vhost_enable_notify(&net->dev, vq))
--
2.5.0

Jason Wang

unread,
Mar 31, 2016, 2:00:09 AM3/31/16
to
This patch factors out core logic of busy polling to
sk_busy_loop_once() in order to be reused by other modules.

Signed-off-by: Jason Wang <jaso...@redhat.com>
---
include/net/busy_poll.h | 7 ++++++
net/core/dev.c | 59 ++++++++++++++++++++++++++++---------------------
2 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 2fbeb13..e765e23 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -73,6 +73,7 @@ static inline bool busy_loop_timeout(unsigned long end_time)
}

bool sk_busy_loop(struct sock *sk, int nonblock);
+int sk_busy_loop_once(struct sock *sk, struct napi_struct *napi);

/* used in the NIC receive handler to mark the skb */
static inline void skb_mark_napi_id(struct sk_buff *skb,
@@ -117,6 +118,12 @@ static inline bool busy_loop_timeout(unsigned long end_time)
return true;
}

+static inline int sk_busy_loop_once(struct napi_struct *napi,
+ int (*busy_poll)(struct napi_struct *dev))
+{
+ return 0;
+}
+
static inline bool sk_busy_loop(struct sock *sk, int nonblock)
{
return false;
diff --git a/net/core/dev.c b/net/core/dev.c
index b9bcbe7..a2f0c46 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4902,10 +4902,42 @@ static struct napi_struct *napi_by_id(unsigned int napi_id)

#if defined(CONFIG_NET_RX_BUSY_POLL)
#define BUSY_POLL_BUDGET 8
+int sk_busy_loop_once(struct sock *sk, struct napi_struct *napi)
+{
+ int (*busy_poll)(struct napi_struct *dev);
+ int rc = 0;
+
+ /* Note: ndo_busy_poll method is optional in linux-4.5 */
+ busy_poll = napi->dev->netdev_ops->ndo_busy_poll;
+
+ local_bh_disable();
+ if (busy_poll) {
+ rc = busy_poll(napi);
+ } else if (napi_schedule_prep(napi)) {
+ void *have = netpoll_poll_lock(napi);
+
+ if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
+ rc = napi->poll(napi, BUSY_POLL_BUDGET);
+ trace_napi_poll(napi);
+ if (rc == BUSY_POLL_BUDGET) {
+ napi_complete_done(napi, rc);
+ napi_schedule(napi);
+ }
+ }
+ netpoll_poll_unlock(have);
+ }
+ if (rc > 0)
+ NET_ADD_STATS_BH(sock_net(sk),
+ LINUX_MIB_BUSYPOLLRXPACKETS, rc);
+ local_bh_enable();
+
+ return rc;
+}
+EXPORT_SYMBOL(sk_busy_loop_once);
+
bool sk_busy_loop(struct sock *sk, int nonblock)
{
unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
- int (*busy_poll)(struct napi_struct *dev);
struct napi_struct *napi;
int rc = false;

@@ -4915,31 +4947,8 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
if (!napi)
goto out;

- /* Note: ndo_busy_poll method is optional in linux-4.5 */
- busy_poll = napi->dev->netdev_ops->ndo_busy_poll;
-
do {
- rc = 0;
- local_bh_disable();
- if (busy_poll) {
- rc = busy_poll(napi);
- } else if (napi_schedule_prep(napi)) {
- void *have = netpoll_poll_lock(napi);
-
- if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
- rc = napi->poll(napi, BUSY_POLL_BUDGET);
- trace_napi_poll(napi);
- if (rc == BUSY_POLL_BUDGET) {
- napi_complete_done(napi, rc);
- napi_schedule(napi);
- }
- }
- netpoll_poll_unlock(have);
- }
- if (rc > 0)
- NET_ADD_STATS_BH(sock_net(sk),
- LINUX_MIB_BUSYPOLLRXPACKETS, rc);
- local_bh_enable();
+ rc = sk_busy_loop_once(sk, napi);

if (rc == LL_FLUSH_FAILED)
break; /* permanent failure */
--
2.5.0

Jason Wang

unread,
Mar 31, 2016, 2:00:14 AM3/31/16
to
Signed-off-by: Jason Wang <jaso...@redhat.com>
---
drivers/net/macvtap.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 95394ed..1891aff 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -20,6 +20,7 @@
#include <net/net_namespace.h>
#include <net/rtnetlink.h>
#include <net/sock.h>
+#include <net/busy_poll.h>
#include <linux/virtio_net.h>

/*
@@ -369,6 +370,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
goto drop;

if (!segs) {
+ sk_mark_napi_id(&q->sk, skb);
skb_queue_tail(&q->sk.sk_receive_queue, skb);
goto wake_up;
}
@@ -378,6 +380,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
struct sk_buff *nskb = segs->next;

segs->next = NULL;
+ sk_mark_napi_id(&q->sk, segs);
skb_queue_tail(&q->sk.sk_receive_queue, segs);
segs = nskb;
}
@@ -391,6 +394,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
!(features & NETIF_F_CSUM_MASK) &&
skb_checksum_help(skb))
goto drop;
+ sk_mark_napi_id(&q->sk, skb);
skb_queue_tail(&q->sk.sk_receive_queue, skb);
}

--
2.5.0

Jason Wang

unread,
Mar 31, 2016, 2:00:14 AM3/31/16
to
Signed-off-by: Jason Wang <jaso...@redhat.com>
---
drivers/net/tun.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index afdf950..950faf5 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -69,6 +69,7 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/sock.h>
+#include <net/busy_poll.h>
#include <linux/seq_file.h>
#include <linux/uio.h>

@@ -871,6 +872,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)

nf_reset(skb);

+ sk_mark_napi_id(tfile->socket.sk, skb);
/* Enqueue packet */
skb_queue_tail(&tfile->socket.sk->sk_receive_queue, skb);

@@ -878,7 +880,6 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
if (tfile->flags & TUN_FASYNC)
kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
tfile->socket.sk->sk_data_ready(tfile->socket.sk);
-
rcu_read_unlock();
return NETDEV_TX_OK;

--
2.5.0
0 new messages