Please comment.
To make bridge and bonding support netpoll, we need to adjust
some netpoll generic code. This patch does the following things:
1) introduce two new priv_flags for struct net_device:
IFF_IN_NETPOLL which identifies we are processing a netpoll;
IFF_DISABLE_NETPOLL is used to disable netpoll support for a device
at run-time;
2) introduce three new methods for netdev_ops:
->ndo_netpoll_setup() is used to setup netpoll for a device;
->ndo_netpoll_xmit() is used to transmit netpoll requests;
->ndo_netpoll_cleanup() is used to clean up netpoll when a device is
removed.
3) introduce netpoll_poll_dev() which takes a struct net_device * parameter;
4) export netpoll_send_skb() and netpoll_poll_dev() which will be used later;
5) hide a pointer to struct netpoll in struct netpoll_info, ditto.
Cc: David Miller <da...@davemloft.net>
Cc: Neil Horman <nho...@tuxdriver.com>
Signed-off-by: WANG Cong <amw...@redhat.com>
---
Index: linux-2.6/include/linux/if.h
===================================================================
--- linux-2.6.orig/include/linux/if.h
+++ linux-2.6/include/linux/if.h
@@ -71,6 +71,8 @@
* release skb->dst
*/
#define IFF_DONT_BRIDGE 0x800 /* disallow bridging this ether dev */
+#define IFF_IN_NETPOLL 0x1000 /* whether we are processing netpoll */
+#define IFF_DISABLE_NETPOLL 0x2000 /* disable netpoll at run-time */
#define IF_GET_IFACE 0x0001 /* for querying only */
#define IF_GET_PROTO 0x0002
Index: linux-2.6/include/linux/netdevice.h
===================================================================
--- linux-2.6.orig/include/linux/netdevice.h
+++ linux-2.6/include/linux/netdevice.h
@@ -530,6 +530,8 @@ struct netdev_queue {
unsigned long tx_dropped;
} ____cacheline_aligned_in_smp;
+struct netpoll;
+struct netpoll_info;
/*
* This structure defines the management hooks for network devices.
@@ -667,6 +669,12 @@ struct net_device_ops {
unsigned short vid);
#ifdef CONFIG_NET_POLL_CONTROLLER
void (*ndo_poll_controller)(struct net_device *dev);
+ void (*ndo_netpoll_setup)(struct net_device *dev,
+ struct netpoll_info *npinfo);
+ int (*ndo_netpoll_xmit)(struct netpoll *np,
+ struct sk_buff *skb,
+ struct net_device *dev);
+ void (*ndo_netpoll_cleanup)(struct net_device *dev);
#endif
int (*ndo_set_vf_mac)(struct net_device *dev,
int queue, u8 *mac);
Index: linux-2.6/include/linux/netpoll.h
===================================================================
--- linux-2.6.orig/include/linux/netpoll.h
+++ linux-2.6/include/linux/netpoll.h
@@ -36,8 +36,11 @@ struct netpoll_info {
struct sk_buff_head txq;
struct delayed_work tx_work;
+
+ struct netpoll *netpoll;
};
+void netpoll_poll_dev(struct net_device *dev);
void netpoll_poll(struct netpoll *np);
void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
void netpoll_print_options(struct netpoll *np);
@@ -47,6 +50,7 @@ int netpoll_trap(void);
void netpoll_set_trap(int trap);
void netpoll_cleanup(struct netpoll *np);
int __netpoll_rx(struct sk_buff *skb);
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
#ifdef CONFIG_NETPOLL
Index: linux-2.6/net/core/netpoll.c
===================================================================
--- linux-2.6.orig/net/core/netpoll.c
+++ linux-2.6/net/core/netpoll.c
@@ -178,9 +178,8 @@ static void service_arp_queue(struct net
}
}
-void netpoll_poll(struct netpoll *np)
+void netpoll_poll_dev(struct net_device *dev)
{
- struct net_device *dev = np->dev;
const struct net_device_ops *ops;
if (!dev || !netif_running(dev))
@@ -200,6 +199,13 @@ void netpoll_poll(struct netpoll *np)
zap_completion_queue();
}
+void netpoll_poll(struct netpoll *np)
+{
+ if (!np->dev)
+ return;
+ netpoll_poll_dev(np->dev);
+}
+
static void refill_skbs(void)
{
struct sk_buff *skb;
@@ -281,7 +287,7 @@ static int netpoll_owner_active(struct n
return 0;
}
-static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
{
int status = NETDEV_TX_BUSY;
unsigned long tries;
@@ -307,7 +313,10 @@ static void netpoll_send_skb(struct netp
tries > 0; --tries) {
if (__netif_tx_trylock(txq)) {
if (!netif_tx_queue_stopped(txq)) {
- status = ops->ndo_start_xmit(skb, dev);
+ if (ops->ndo_netpoll_xmit)
+ status = ops->ndo_netpoll_xmit(np, skb, dev);
+ else
+ status = ops->ndo_start_xmit(skb, dev);
if (status == NETDEV_TX_OK)
txq_trans_update(txq);
}
@@ -752,7 +761,10 @@ int netpoll_setup(struct netpoll *np)
atomic_inc(&npinfo->refcnt);
}
- if (!ndev->netdev_ops->ndo_poll_controller) {
+ npinfo->netpoll = np;
+
+ if (ndev->priv_flags & IFF_DISABLE_NETPOLL
+ || !ndev->netdev_ops->ndo_poll_controller) {
printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
np->name, np->dev_name);
err = -ENOTSUPP;
@@ -830,6 +842,9 @@ int netpoll_setup(struct netpoll *np)
/* last thing to do is link it to the net device structure */
ndev->npinfo = npinfo;
+ if (ndev->netdev_ops->ndo_netpoll_setup)
+ ndev->netdev_ops->ndo_netpoll_setup(ndev, npinfo);
+
/* avoid racing with NAPI reading npinfo */
synchronize_rcu();
@@ -904,6 +919,7 @@ void netpoll_set_trap(int trap)
atomic_dec(&trapped);
}
+EXPORT_SYMBOL(netpoll_send_skb);
EXPORT_SYMBOL(netpoll_set_trap);
EXPORT_SYMBOL(netpoll_trap);
EXPORT_SYMBOL(netpoll_print_options);
@@ -911,4 +927,5 @@ EXPORT_SYMBOL(netpoll_parse_options);
EXPORT_SYMBOL(netpoll_setup);
EXPORT_SYMBOL(netpoll_cleanup);
EXPORT_SYMBOL(netpoll_send_udp);
+EXPORT_SYMBOL(netpoll_poll_dev);
EXPORT_SYMBOL(netpoll_poll);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majo...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
1) implement the 4 methods to support netpoll for bridge;
2) modify netpoll during forwarding packets in bridge;
3) disable netpoll support of bridge when a netpoll-unabled device
is added to bridge;
4) enable netpoll support when all underlying devices support netpoll.
Cc: David Miller <da...@davemloft.net>
Cc: Neil Horman <nho...@tuxdriver.com>
Cc: Stephen Hemminger <shemm...@linux-foundation.org>
Signed-off-by: WANG Cong <amw...@redhat.com>
---
Index: linux-2.6/net/bridge/br_device.c
===================================================================
--- linux-2.6.orig/net/bridge/br_device.c
+++ linux-2.6/net/bridge/br_device.c
@@ -13,6 +13,7 @@
#include <linux/kernel.h>
#include <linux/netdevice.h>
+#include <linux/netpoll.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
@@ -162,6 +163,87 @@ static int br_set_tx_csum(struct net_dev
return 0;
}
+#ifdef CONFIG_NET_POLL_CONTROLLER
+bool br_devices_support_netpoll(struct net_bridge *br)
+{
+ struct net_bridge_port *p;
+ bool ret = true;
+ int count = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&br->lock, flags);
+ list_for_each_entry(p, &br->port_list, list) {
+ count++;
+ if (p->dev->priv_flags & IFF_DISABLE_NETPOLL
+ || !p->dev->netdev_ops->ndo_poll_controller)
+ ret = false;
+ }
+ spin_unlock_irqrestore(&br->lock, flags);
+ return count != 0 && ret;
+}
+
+static void br_poll_controller(struct net_device *br_dev)
+{
+ struct net_bridge *br = netdev_priv(br_dev);
+ struct net_bridge_port *p;
+ unsigned long flags;
+
+ spin_lock_irqsave(&br->lock, flags);
+ list_for_each_entry(p, &br->port_list, list) {
+ if (p->dev->netdev_ops->ndo_poll_controller)
+ netpoll_poll_dev(p->dev);
+ }
+ spin_unlock_irqrestore(&br->lock, flags);
+}
+
+static void br_netpoll_setup(struct net_device *br_dev, struct netpoll_info *npinfo)
+{
+ struct net_bridge *br = netdev_priv(br_dev);
+ struct net_bridge_port *p;
+ unsigned long flags;
+
+ spin_lock_irqsave(&br->lock, flags);
+ list_for_each_entry(p, &br->port_list, list) {
+ if (p->dev)
+ p->dev->npinfo = npinfo;
+ }
+ spin_unlock_irqrestore(&br->lock, flags);
+}
+
+static void br_netpoll_cleanup(struct net_device *br_dev)
+{
+ struct net_bridge *br = netdev_priv(br_dev);
+ struct net_bridge_port *p;
+ const struct net_device_ops *ops;
+ unsigned long flags;
+
+ spin_lock_irqsave(&br->lock, flags);
+ br->dev->npinfo = NULL;
+ list_for_each_entry(p, &br->port_list, list) {
+ if (p->dev) {
+ ops = p->dev->netdev_ops;
+ if (ops->ndo_netpoll_cleanup)
+ ops->ndo_netpoll_cleanup(p->dev);
+ else
+ p->dev->npinfo = NULL;
+ }
+ }
+ spin_unlock_irqrestore(&br->lock, flags);
+}
+
+static int br_netpoll_xmit(struct netpoll *np, struct sk_buff *skb, struct net_device *dev)
+{
+ int ret;
+
+ dev->priv_flags |= IFF_IN_NETPOLL;
+ ret = dev->netdev_ops->ndo_start_xmit(skb, dev);
+ np->dev = dev;
+ dev->priv_flags &= ~IFF_IN_NETPOLL;
+ return ret;
+}
+
+#endif
+
static const struct ethtool_ops br_ethtool_ops = {
.get_drvinfo = br_getinfo,
.get_link = ethtool_op_get_link,
@@ -184,6 +266,12 @@ static const struct net_device_ops br_ne
.ndo_set_multicast_list = br_dev_set_multicast_list,
.ndo_change_mtu = br_change_mtu,
.ndo_do_ioctl = br_dev_ioctl,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ .ndo_netpoll_setup = br_netpoll_setup,
+ .ndo_netpoll_xmit = br_netpoll_xmit,
+ .ndo_netpoll_cleanup = br_netpoll_cleanup,
+ .ndo_poll_controller = br_poll_controller,
+#endif
};
void br_dev_setup(struct net_device *dev)
Index: linux-2.6/net/bridge/br_forward.c
===================================================================
--- linux-2.6.orig/net/bridge/br_forward.c
+++ linux-2.6/net/bridge/br_forward.c
@@ -14,6 +14,7 @@
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
+#include <linux/netpoll.h>
#include <linux/skbuff.h>
#include <linux/if_vlan.h>
#include <linux/netfilter_bridge.h>
@@ -44,7 +45,13 @@ int br_dev_queue_push_xmit(struct sk_buf
else {
skb_push(skb, ETH_HLEN);
- dev_queue_xmit(skb);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ if (skb->dev->priv_flags & IFF_IN_NETPOLL) {
+ netpoll_send_skb(skb->dev->npinfo->netpoll, skb);
+ skb->dev->priv_flags &= ~IFF_IN_NETPOLL;
+ } else
+#endif
+ dev_queue_xmit(skb);
}
}
@@ -60,6 +67,16 @@ int br_forward_finish(struct sk_buff *sk
static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
{
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ struct net_bridge *br = to->br;
+ if (br->dev->priv_flags & IFF_IN_NETPOLL) {
+ skb->dev->npinfo->netpoll->dev = to->dev;
+ if (!to->dev->npinfo)
+ to->dev->npinfo = skb->dev->npinfo;
+
+ to->dev->priv_flags |= IFF_IN_NETPOLL;
+ }
+#endif
skb->dev = to->dev;
NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
br_forward_finish);
Index: linux-2.6/net/bridge/br_if.c
===================================================================
--- linux-2.6.orig/net/bridge/br_if.c
+++ linux-2.6/net/bridge/br_if.c
@@ -19,6 +19,7 @@
#include <linux/init.h>
#include <linux/rtnetlink.h>
#include <linux/if_ether.h>
+#include <linux/netpoll.h>
#include <net/sock.h>
#include "br_private.h"
@@ -152,6 +153,14 @@ static void del_nbp(struct net_bridge_po
kobject_uevent(&p->kobj, KOBJ_REMOVE);
kobject_del(&p->kobj);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ if (br_devices_support_netpoll(br))
+ br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+ if (br->dev->netdev_ops->ndo_netpoll_cleanup)
+ br->dev->netdev_ops->ndo_netpoll_cleanup(br->dev);
+ else
+ dev->npinfo = NULL;
+#endif
call_rcu(&p->rcu, destroy_nbp_rcu);
}
@@ -437,6 +446,20 @@ int br_add_if(struct net_bridge *br, str
kobject_uevent(&p->kobj, KOBJ_ADD);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ if (br_devices_support_netpoll(br)) {
+ br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+ if (br->dev->npinfo)
+ dev->npinfo = br->dev->npinfo;
+ } else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) {
+ br->dev->priv_flags |= IFF_DISABLE_NETPOLL;
+ printk(KERN_INFO "New device %s does not support netpoll\n",
+ dev->name);
+ printk(KERN_INFO "Disabling netpoll for %s\n",
+ br->dev->name);
+ }
+#endif
+
return 0;
err2:
br_fdb_delete_by_port(br, p, 1);
Index: linux-2.6/net/bridge/br_private.h
===================================================================
--- linux-2.6.orig/net/bridge/br_private.h
+++ linux-2.6/net/bridge/br_private.h
@@ -225,6 +225,7 @@ static inline int br_is_root_bridge(cons
extern void br_dev_setup(struct net_device *dev);
extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
struct net_device *dev);
+extern bool br_devices_support_netpoll(struct net_bridge *br);
/* br_fdb.c */
extern int br_fdb_init(void);
Not sure if this is the right thing to do. Shouldn't we simply enable
polling on all devices that support it and warn about the others (aka
best effort)?
--
http://selenic.com : development and support for Mercurial and Linux
Ages ago, Jeff Moyer took a run at this, added him to the cc: on the off
chance he still cares.
> Please comment.
>
>
> To make bridge and bonding support netpoll, we need to adjust
> some netpoll generic code. This patch does the following things:
>
> 1) introduce two new priv_flags for struct net_device:
> IFF_IN_NETPOLL which identifies we are processing a netpoll;
> IFF_DISABLE_NETPOLL is used to disable netpoll support for a device
> at run-time;
This one is a little worrisome. I've tried to keep the netpoll code
restricted to as tight an area as possible. Adding new flags like these
that random drivers might try to fiddle with seems like a good way for a
driver writer to get in trouble. Also flag space is filling up.
> 2) introduce three new methods for netdev_ops:
> ->ndo_netpoll_setup() is used to setup netpoll for a device;
> ->ndo_netpoll_xmit() is used to transmit netpoll requests;
> ->ndo_netpoll_cleanup() is used to clean up netpoll when a device is
> removed.
Seems like a lot of interface for something to be used by only a couple
core drivers. Hopefully Dave has an opinion here.
--
http://selenic.com : development and support for Mercurial and Linux
I don't think it's a good idea, because we check if a device
supports netpoll by checking if it has ndo_poll_controller method.
Thanks.
Somewhat, but currently I don't have other way to replace this.
Any suggestions?
>
>> 2) introduce three new methods for netdev_ops:
>> ->ndo_netpoll_setup() is used to setup netpoll for a device;
>> ->ndo_netpoll_xmit() is used to transmit netpoll requests;
>> ->ndo_netpoll_cleanup() is used to clean up netpoll when a device is
>> removed.
>
> Seems like a lot of interface for something to be used by only a couple
> core drivers. Hopefully Dave has an opinion here.
>
Yeah, I worry about this too, maybe we can group those methods
for netpoll together into another struct, and just put a pointer
here?
Thanks!
> Matt Mackall wrote:
>> Seems like a lot of interface for something to be used by only a
>> couple
>> core drivers. Hopefully Dave has an opinion here.
>>
>
> Yeah, I worry about this too, maybe we can group those methods
> for netpoll together into another struct, and just put a pointer
> here?
This looks like it's tackled at the wrong layer, to be honest.
Teaching all of these layers about eachother's states is
going to end up being a nightmare in the end.
All of this "where is the npinfo" business can be handled
generically in net/core/dev.c I think, with none of these
callbacks.
For example, something like "if dev lacks ->npinfo, check
it's master".
Another thing, I wouldn't iterate over all devices, like I
see in the bonding poll controller method. Just whichever
one supports netpoll you see first, use it and exit
immediately. Don't send it to every single port, I can't
see how that might be desirable or useful.
Uh, what? If we have 5 devices on a bridge and 4 support netpoll, then
shouldn't we just send netconsole messages to those 4 devices? Isn't
this much better than simply refusing to work?
--
http://selenic.com : development and support for Mercurial and Linux
How could you let the bridge know netpoll is not sent to
the one that doesn't support netpoll during setup? This will
be complex, I am afraid.
Thanks.
This is a good point! I haven't tried but certainly this is
worthy a try. Ideally those callbacks can be all removed,
but I don't know if this is true practically. ;)
I will try.
>
> Another thing, I wouldn't iterate over all devices, like I
> see in the bonding poll controller method. Just whichever
> one supports netpoll you see first, use it and exit
> immediately. Don't send it to every single port, I can't
> see how that might be desirable or useful.
Yeah, for bonding case, probably. But for bridge case, I think
we still need to check all, right?
Thanks!
I thought I saw a simple loop over bridge devices at poll time in your
patch. So it should be a simple matter of skipping unsupported devices
in that loop.
But Dave thinks there a bigger problems here, so I recommend first
figuring out the architecture issues, then we can get back to the policy
issues.
--
http://selenic.com : development and support for Mercurial and Linux
> How could you let the bridge know netpoll is not sent to
> the one that doesn't support netpoll during setup? This will
> be complex, I am afraid.
Why does this matter at all?
I told you in another mail that we should do away with
these callbacks and all the crazy 'npinfo' assignments
and just do it in the generic code.
Nope, we need to check if the target address is owned by
a device that doesn't support netpoll or not, simple skipping
will not work.
>
> But Dave thinks there a bigger problems here, so I recommend first
> figuring out the architecture issues, then we can get back to the policy
> issues.
>
Ok. Thanks!
> On Tue, 2010-03-23 at 12:39 +0800, Cong Wang wrote:
>> Matt Mackall wrote:
>> How could you let the bridge know netpoll is not sent to
>> the one that doesn't support netpoll during setup? This will
>> be complex, I am afraid.
>
> I thought I saw a simple loop over bridge devices at poll time in your
> patch. So it should be a simple matter of skipping unsupported devices
> in that loop.
It's because of all that "assign ->npinfo to slaves" crap he has to do
the way his patches are currently implemented.
It's basically another sign that the design is wrong.
> Yeah, for bonding case, probably. But for bridge case, I think
> we still need to check all, right?
Why? Who cares?
If it goes out one port and reaches it's destination
the objective has been achieved.
Sending it out N more times achieves nothing.
Because currently we check netpoll support by ->ndo_poll_controller,
for example, tap driver doesn't have ->ndo_poll_controller now,
if I choose the target "@192.168.0.2/br0" where "192.168.0.2" is owned
by "tap0" which is managed by "br0", netconsole may not work.
>
> I told you in another mail that we should do away with
> these callbacks and all the crazy 'npinfo' assignments
> and just do it in the generic code.
I think ->ndo_poll_controller is not in the case that you talked about.
Thanks.
We have to check which port has the right destination.
Ideally we should check the right destination address to
choose the port, but currently we don't have a generic
way to check this, thus I chose to send it to all ports.
You are right, this needs to be improved.
Thanks!
> On Mon, 2010-03-22 at 04:17 -0400, Amerigo Wang wrote:
>> This whole patchset is for adding netpoll support to bridge and bonding
>> devices. I already tested it for bridge, bonding, bridge over bonding,
>> and bonding over bridge. It looks fine now.
>
> Ages ago, Jeff Moyer took a run at this, added him to the cc: on the off
> chance he still cares.
I'll take a look at it in a bit. For now, here is the link to my
original post on this for Amerigo's reading pleasure:
http://lkml.indiana.edu/hypermail/linux/kernel/0507.0/0206.html
Cheers,
Jeff
Thanks, Jeff! I will take a look at it.