[PATCH 01/10] netlink: minimal Linux rtnetlink support

150 views
Skip to first unread message

Waldemar Kozaczuk

unread,
Jun 3, 2022, 9:28:46 PM6/3/22
to osv...@googlegroups.com, Waldemar Kozaczuk, Charles Myers
This 1st of the 10 patches brings support of the minimal subset of
the rtnetlink (Linux routing socket) interface as described here -
https://man7.org/linux/man-pages/man7/rtnetlink.7.html.
The rtnetlink is actually a subset of even richer netlink interface
described here - https://man7.org/linux/man-pages/man7/netlink.7.html.
In other words, rtnetlink covers a NETLINK_ROUTE family of the broader
netlink interface.

We need rtnetlink in order to support the implemetation of
if_nameindex() and getifaddrs() in modern musl 1.1.24. In addition
Golang uses the netlink interface to discover the interfaces and IP
address as well.

Please note this is an original copy of the Charles Myers' two commits:
f1cd48e0f192564d64e7b1e1caccc8df05e7cd5d except of the modifications to
bsd/net.cc that are part of the last commit and subset of the
64a0c1affe9921e6a5a5b164edf1a544a7297393 that adds lltable_foreach()
and lltable_foreach_lle(). The next 8 much smaller patches fix various
small bugs and enhance slightly this implementation.
The last one enables the netlink support and adds a unit test.

The netlink interface is pretty rich and not very precisely documented.
I have actually used a unit test to discover in more details how the
netlink responses should look like.

In general, the application would use standard socket API to open a
socket with the domain and protocol equal to AF_NETLINK and NETLINK_ROUTE
respectively and typically use SOCK_RAW as type. Then it would
optionally bind the socket and build a request sent using standard
sendmsg(). Finally it would receive all replies from kernel using
recvmsg().

To illustrate, the incomplete code might look like this:

////////////////////////////////////////////////////////
//step 1
int s = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);

//step 2
src_addr.nl_family = AF_NETLINK;
src_addr.nl_pid = pid; // if 0 kernel will assign unique id
bind(s, (struct sockaddr*) &src_addr, sizeof(src_addr))

// step 3
dst_addr.nl_family = AF_NETLINK;
dst_addr.nl_pid = 0; // should be 0 if destination is kernel

iov[0].iov_base = req;
iov[0].iov_len = req->nlmsg_len;

snd_msg.msg_iov = iov;
snd_msg.msg_iovlen = 1;
snd_msg.msg_name = &dst_addr;
snd_msg.msg_namelen = sizeof(dst_addr);

sendmsg(s, &snd_msg, 0)

//step 4
rcv_msg.msg_iov[0].iov_base = buf;
rcv_msg.msg_iov[0].iov_len = BUFSIZE;
recvmsg(s, &rcv_msg, 0)
//process replies received in buf
////////////////////////////////////////////////////////

This patch implements support of only 3 rtnetlink types of requests:
- RTM_GETLINK
- RTM_GETADDR
- RTM_GETNEIGH

The bulk of the implementation is in the linux_netlink.cc and
mostly centers around following functions:
- netlink_attach()
- netlink_bind()
- netlink_output()
- netlink_process_msg()
- netlink_process_getlink_msg()
- netlink_process_getaddr_msg()
- netlink_process_getneigh_msg()

Most other pru_* functions delegate to raw_usrreqs as is.

Authored-by: Charles Myers <Charle...@spirent.com>
Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>
---
Makefile | 1 +
bsd/sys/compat/linux/linux_netlink.cc | 904 ++++++++++++++++++++++++++
bsd/sys/compat/linux/linux_netlink.h | 175 +++++
bsd/sys/compat/linux/linux_socket.cc | 5 +
bsd/sys/compat/linux/linux_socket.h | 1 +
bsd/sys/net/if_llatbl.cc | 46 +-
bsd/sys/net/if_llatbl.h | 13 +
bsd/sys/net/netisr.h | 1 +
8 files changed, 1143 insertions(+), 3 deletions(-)
create mode 100644 bsd/sys/compat/linux/linux_netlink.cc
create mode 100644 bsd/sys/compat/linux/linux_netlink.h

diff --git a/Makefile b/Makefile
index 19a4571b..2d1ba6a8 100644
--- a/Makefile
+++ b/Makefile
@@ -593,6 +593,7 @@ bsd += bsd/porting/bus_dma.o
bsd += bsd/sys/netinet/if_ether.o
bsd += bsd/sys/compat/linux/linux_socket.o
bsd += bsd/sys/compat/linux/linux_ioctl.o
+bsd += bsd/sys/compat/linux/linux_netlink.o
bsd += bsd/sys/net/if_ethersubr.o
bsd += bsd/sys/net/if_llatbl.o
bsd += bsd/sys/net/radix.o
diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc
new file mode 100644
index 00000000..bc02bb7f
--- /dev/null
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -0,0 +1,904 @@
+/*
+ * Linux NETLINK socket implementation.
+ *
+ * NETLINK is used to support IPv4/IPv6 LIBC getifaddrs(), if_nameindex().
+ *
+ * Warning: Tx/Rx messages are compatible with Linux not FreeBSD.
+ */
+
+#include <osv/initialize.hh>
+#include <bsd/porting/netport.h>
+
+#include <bsd/sys/sys/param.h>
+#include <bsd/sys/sys/domain.h>
+#include <bsd/sys/sys/mbuf.h>
+#include <bsd/sys/sys/priv.h>
+#include <bsd/sys/sys/protosw.h>
+#include <bsd/sys/sys/socket.h>
+#include <bsd/sys/sys/socketvar.h>
+#include <bsd/sys/sys/sysctl.h>
+
+#include <bsd/sys/net/if.h>
+#include <bsd/sys/net/if_dl.h>
+#include <bsd/sys/net/if_llatbl.h>
+#include <bsd/sys/net/if_types.h>
+#include <bsd/sys/net/netisr.h>
+#include <bsd/sys/net/raw_cb.h>
+#include <bsd/sys/net/route.h>
+#include <bsd/sys/net/vnet.h>
+
+#include <bsd/sys/netinet/in.h>
+#include <bsd/sys/netinet/if_ether.h>
+#include <bsd/sys/net/if_llatbl.h>
+
+#ifdef INET6
+#include <bsd/sys/netinet/ip6.h>
+#include <bsd/sys/netinet6/ip6_var.h>
+#include <bsd/sys/netinet6/in6_var.h>
+#include <bsd/sys/netinet6/scope6_var.h>
+#include <bsd/sys/netinet6/nd6.h>
+#endif
+
+#include <bsd/sys/compat/linux/linux.h>
+#include <bsd/sys/compat/linux/linux_netlink.h>
+#include <bsd/sys/compat/linux/linux_socket.h>
+
+#if !defined(offsetof)
+#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
+#endif
+
+mutex netlink_mtx;
+
+#define NETLINK_LOCK() mutex_lock(&netlink_mtx)
+#define NETLINK_UNLOCK() mutex_unlock(&netlink_mtx)
+#define NETLINK_LOCK_ASSERT() assert(netlink_mtx.owned())
+
+struct bsd_sockaddr_nl {
+ uint8_t nl_len; /* length of this struct */
+ bsd_sa_family_t nl_family; /* AF_NETLINK */
+ unsigned short nl_pad; /* Zero */
+ pid_t nl_pid; /* Port ID */
+ uint32_t nl_groups; /* Multicast groups mask */
+};
+
+MALLOC_DEFINE(M_NETLINK, "netlink", "netlink socket");
+
+static struct bsd_sockaddr netlink_src = { 2, PF_NETLINK, };
+
+
+
+static size_t mask_to_prefix_len(const uint8_t *bytes, size_t n_bytes)
+{
+ for (size_t i=0; i <n_bytes; ++i) {
+ uint8_t val = bytes[n_bytes - i - 1];
+ if (val == 0)
+ continue;
+ /* Find first bit in byte which is set */
+ int bit_pos = __builtin_ffs((long)val) - 1;
+ size_t pos = 8 * (n_bytes - i) - bit_pos;
+ return pos;
+ }
+ return 0;
+}
+
+static int get_sockaddr_mask_prefix_len(struct bsd_sockaddr *sa)
+{
+ void *data;
+ int data_len;
+
+ if (!sa)
+ return 0;
+
+ switch (sa->sa_family) {
+#ifdef INET
+ case AF_INET:
+ data = &((struct bsd_sockaddr_in *)sa)->sin_addr;
+ data_len = sizeof(((struct bsd_sockaddr_in *)sa)->sin_addr);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ data = ((struct bsd_sockaddr_in6 *)sa)->sin6_addr.s6_addr;
+ data_len = sizeof(((struct bsd_sockaddr_in6 *)sa)->sin6_addr);
+ break;
+#endif
+ default:
+ return 0;
+ }
+
+ return mask_to_prefix_len((uint8_t *)data, data_len);
+}
+
+
+void *nl_m_put(struct mbuf *m0, int len)
+{
+ struct mbuf *m, *n;
+ void *data = NULL;
+ int space;
+
+ /* Skip to last buffer in chain */
+ for (m = m0; m->m_hdr.mh_next != NULL; m = m->m_hdr.mh_next)
+ ;
+
+ space = M_TRAILINGSPACE(m);
+ if (len <= space) {
+ /* Add to existing buffer if there is space */
+ data = m->m_hdr.mh_data + m->m_hdr.mh_len;
+ m->m_hdr.mh_len += len;
+ } else {
+ /* Add additional buffer for new message */
+ if (len > MLEN)
+ return NULL;
+ n = m_get(M_NOWAIT, m->m_hdr.mh_type);
+ if (n == NULL)
+ return NULL;
+ data = n->m_hdr.mh_data;
+ n->m_hdr.mh_len = len;
+ m->m_hdr.mh_next = n;
+ m = n;
+ }
+ if (m0->m_hdr.mh_flags & M_PKTHDR) {
+ m0->M_dat.MH.MH_pkthdr.len += len;
+ }
+ return data;
+}
+
+struct nlmsghdr * nlmsg_put(struct mbuf *m, uint32_t pid, uint32_t seq, int type, int len, int flags)
+{
+ struct nlmsghdr *nlh;
+ int size = nlmsg_msg_size(len);
+ int align_size = NLMSG_ALIGN(size);
+ nlh = (struct nlmsghdr *) nl_m_put(m, align_size);
+ if (!nlh)
+ return NULL;
+ nlh->nlmsg_type = type;
+ nlh->nlmsg_len = size;
+ nlh->nlmsg_flags = flags;
+ nlh->nlmsg_pid = pid;
+ nlh->nlmsg_seq = seq;
+ if (align_size != size) {
+ memset(nlmsg_data(nlh) + len, 0, align_size - size);
+ }
+ return nlh;
+}
+
+struct nlmsghdr * nlmsg_begin(struct mbuf *m, uint32_t pid, uint32_t seq, int type, int len, int flags)
+{
+ return nlmsg_put(m, pid, seq, type, len, flags);
+}
+
+void nlmsg_end(struct mbuf *m, struct nlmsghdr *nlh)
+{
+ nlh->nlmsg_len = m->M_dat.MH.MH_pkthdr.len - ((uintptr_t)nlh - (uintptr_t)m->m_hdr.mh_data);
+}
+
+int nla_put(struct mbuf *m, int attrtype, int len, const void *src)
+{
+ struct nlattr *nla;
+ int size = nla_attr_size(len);
+ int align_size = NLA_ALIGN(size);
+ nla = (struct nlattr *)nl_m_put(m, align_size);
+ if (!nla)
+ return ENOMEM;
+ nla->nla_len = size;
+ nla->nla_type = attrtype;
+ void *dest = nla_data(nla);
+ memcpy(dest, src, len);
+ if (size != align_size)
+ memset(dest + size, 0, (align_size - size));
+ return 0;
+}
+
+template<class T>
+int nla_put_type(struct mbuf *m, int attrtype, T val)
+{
+ return nla_put(m, attrtype, sizeof(val), &val);
+}
+
+int nla_put_string(struct mbuf *m, int attrtype, const char *str)
+{
+ return nla_put(m, attrtype, strlen(str) + 1, str);
+}
+
+int nla_put_sockaddr(struct mbuf *m, int attrtype, struct bsd_sockaddr *sa)
+{
+ void *data;
+ int data_len;
+
+ if (!sa)
+ return 0;
+
+ switch (sa->sa_family) {
+#ifdef INET
+ case AF_INET:
+ data = &((struct bsd_sockaddr_in *)sa)->sin_addr;
+ data_len = sizeof(((struct bsd_sockaddr_in *)sa)->sin_addr);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ data = ((struct bsd_sockaddr_in6 *)sa)->sin6_addr.s6_addr;
+ data_len = sizeof(((struct bsd_sockaddr_in6 *)sa)->sin6_addr);
+ break;
+#endif
+ case AF_LINK:
+ data = ((struct bsd_sockaddr_dl *)sa)->sdl_data + ((struct bsd_sockaddr_dl *)sa)->sdl_nlen;
+ data_len = ((struct bsd_sockaddr_dl *)sa)->sdl_alen;
+ break;
+ default:
+ data = sa->sa_data;
+ data_len = sa->sa_len;
+ break;
+ }
+
+ return nla_put(m, attrtype, data_len, data);
+}
+
+static int netlink_output(struct mbuf *m, struct socket *so);
+
+
+/* Currently messages are always redirected back to the socket which
+ * sent the message, so an ISR dispatch handler is not needed.
+ *
+ */
+
+static void netlink_input(struct mbuf *m);
+
+static struct netisr_handler netlink_nh = initialize_with([] (netisr_handler& x) {
+ x.nh_name = "netlink";
+ x.nh_handler = netlink_input;
+ x.nh_proto = NETISR_NETLINK;
+ x.nh_policy = NETISR_POLICY_SOURCE;
+});
+
+static int
+raw_input_netlink_cb(struct mbuf *m, struct sockproto *proto, struct bsd_sockaddr *src, struct rawcb *rp)
+{
+ int fibnum;
+
+ KASSERT(m != NULL, ("%s: m is NULL", __func__));
+ KASSERT(proto != NULL, ("%s: proto is NULL", __func__));
+ KASSERT(rp != NULL, ("%s: rp is NULL", __func__));
+
+ /* Check if it is a rts and the fib matches the one of the socket. */
+ fibnum = M_GETFIB(m);
+ if (proto->sp_family != PF_NETLINK ||
+ rp->rcb_socket == NULL ||
+ rp->rcb_socket->so_fibnum == fibnum)
+ return (0);
+
+ /* Filtering requested and no match, the socket shall be skipped. */
+ return (1);
+}
+
+static void
+netlink_input(struct mbuf *m)
+{
+ struct sockproto netlink_proto;
+
+ netlink_proto.sp_family = PF_NETLINK;
+
+ raw_input_ext(m, &netlink_proto, &netlink_src, raw_input_netlink_cb);
+}
+
+void
+netlink_init(void)
+{
+ mutex_init(&netlink_mtx);
+ netisr_register(&netlink_nh);
+}
+
+SYSINIT(netlink, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, netlink_init, 0);
+
+/*
+ * It really doesn't make any sense at all for this code to share much
+ * with raw_usrreq.c, since its functionality is so restricted. XXX
+ */
+static void
+netlink_abort(struct socket *so)
+{
+ raw_usrreqs.pru_abort(so);
+}
+
+static void
+netlink_close(struct socket *so)
+{
+ raw_usrreqs.pru_close(so);
+}
+
+/* pru_accept is EOPNOTSUPP */
+
+static int
+netlink_attach(struct socket *so, int proto, struct thread *td)
+{
+ struct rawcb *rp;
+ int s, error;
+
+ KASSERT(so->so_pcb == NULL, ("netlink_attach: so_pcb != NULL"));
+
+ /* XXX */
+ rp = (rawcb *)malloc(sizeof *rp);
+ if (rp == NULL)
+ return ENOBUFS;
+ bzero(rp, sizeof *rp);
+
+ /*
+ * The splnet() is necessary to block protocols from sending
+ * error notifications (like RTM_REDIRECT or RTM_LOSING) while
+ * this PCB is extant but incompletely initialized.
+ * Probably we should try to do more of this work beforehand and
+ * eliminate the spl.
+ */
+ s = splnet();
+ so->so_pcb = (caddr_t)rp;
+ so->set_mutex(&netlink_mtx);
+ so->so_fibnum = 0;
+ error = raw_attach(so, proto);
+ rp = sotorawcb(so);
+ if (error) {
+ splx(s);
+ so->so_pcb = NULL;
+ free(rp);
+ return error;
+ }
+ NETLINK_LOCK();
+ soisconnected(so);
+ NETLINK_UNLOCK();
+ so->so_options |= SO_USELOOPBACK;
+ splx(s);
+ return 0;
+}
+
+static int
+netlink_bind(struct socket *so, struct bsd_sockaddr *nam, struct thread *td)
+{
+ struct rawcb *rp = sotorawcb(so);
+
+ KASSERT(rp != NULL, ("netlink_bind: rp == NULL"));
+
+ if (nam->sa_family == AF_NETLINK) {
+ if (nam->sa_len != sizeof(struct bsd_sockaddr_nl)) {
+ bsd_log(ERR, "%s(%d) %s Invalid sockaddr_nl length %d expected %d\n",
+ __FILE__, __LINE__, __FUNCTION__, nam->sa_len, sizeof(struct bsd_sockaddr_nl));
+ return EINVAL;
+ }
+ // TODO: stash the nl_pid somewhere
+ return 0;
+ }
+ return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */
+}
+
+static int
+netlink_connect(struct socket *so, struct bsd_sockaddr *nam, struct thread *td)
+{
+ return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */
+}
+
+/* pru_connect2 is EOPNOTSUPP */
+/* pru_control is EOPNOTSUPP */
+
+static void
+netlink_detach(struct socket *so)
+{
+ struct rawcb *rp = sotorawcb(so);
+
+ KASSERT(rp != NULL, ("netlink_detach: rp == NULL"));
+
+ raw_usrreqs.pru_detach(so);
+}
+
+static int
+netlink_disconnect(struct socket *so)
+{
+ return (raw_usrreqs.pru_disconnect(so));
+}
+
+/* pru_listen is EOPNOTSUPP */
+
+static int
+netlink_peeraddr(struct socket *so, struct bsd_sockaddr **nam)
+{
+ return (raw_usrreqs.pru_peeraddr(so, nam));
+}
+
+/* pru_rcvd is EOPNOTSUPP */
+/* pru_rcvoob is EOPNOTSUPP */
+
+static int
+netlink_send(struct socket *so, int flags, struct mbuf *m, struct bsd_sockaddr *nam,
+ struct mbuf *control, struct thread *td)
+{
+ return (raw_usrreqs.pru_send(so, flags, m, nam, control, td));
+}
+
+/* pru_sense is null */
+
+static int
+netlink_shutdown(struct socket *so)
+{
+ return (raw_usrreqs.pru_shutdown(so));
+}
+
+static int
+netlink_sockaddr(struct socket *so, struct bsd_sockaddr **nam)
+{
+ return (raw_usrreqs.pru_sockaddr(so, nam));
+}
+
+static struct pr_usrreqs netlink_usrreqs = initialize_with([] (pr_usrreqs& x) {
+ x.pru_abort = netlink_abort;
+ x.pru_attach = netlink_attach;
+ x.pru_bind = netlink_bind;
+ x.pru_connect = netlink_connect;
+ x.pru_detach = netlink_detach;
+ x.pru_disconnect = netlink_disconnect;
+ x.pru_peeraddr = netlink_peeraddr;
+ x.pru_send = netlink_send;
+ x.pru_shutdown = netlink_shutdown;
+ x.pru_sockaddr = netlink_sockaddr;
+ x.pru_close = netlink_close;
+});
+
+static void netlink_dispatch(struct socket *so __bsd_unused2, struct mbuf *m)
+{
+ netisr_queue(NETISR_NETLINK, m);
+}
+
+static int
+netlink_senderr(struct socket *so, struct nlmsghdr *nlm, int error)
+{
+ struct mbuf *m;
+ struct nlmsghdr *hdr;
+ struct nlmsgerr *err;
+
+ m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ if (!m) {
+ return ENOBUFS;
+ }
+
+ if ((hdr = (struct nlmsghdr *)nlmsg_put(m,
+ nlm ? nlm->nlmsg_pid : 0,
+ nlm ? nlm->nlmsg_seq : 0,
+ NLMSG_ERROR, sizeof(*err),
+ nlm ? nlm->nlmsg_flags : 0)) == NULL) {
+ m_freem(m);
+ return ENOBUFS;
+ }
+ err = (struct nlmsgerr *) nlmsg_data(hdr);
+ err->error = error;
+ if (nlm) {
+ err->msg = *nlm;
+ } else {
+ memset(&err->msg, 0, sizeof(err->msg));
+ nlm = &err->msg;
+ }
+
+ netlink_dispatch(so, m);
+ return 0;
+}
+
+static int
+netlink_process_getlink_msg(struct socket *so, struct nlmsghdr *nlm)
+{
+ struct ifnet *ifp = NULL;
+ struct bsd_ifaddr *ifa;
+ struct nlmsghdr *nlh;
+ struct ifinfomsg *ifm;
+ struct mbuf *m = NULL;
+ int error = 0;
+
+ m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ if (!m) {
+ return ENOBUFS;
+ }
+
+ IFNET_RLOCK();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ IF_ADDR_RLOCK(ifp);
+
+ nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_NEWLINK, sizeof(*ifm), nlm->nlmsg_flags);
+ if (!nlh) {
+ error = ENOBUFS;
+ goto done;
+ }
+
+ ifm = (struct ifinfomsg *) nlmsg_data(nlh);
+ ifm->ifi_family = AF_UNSPEC;
+ ifm->__ifi_pad = 0;
+ ifm->ifi_type = ifp->if_data.ifi_type;
+ ifm->ifi_index = ifp->if_index;
+ ifm->ifi_flags = ifp->if_flags | ifp->if_drv_flags;
+ ifm->ifi_change = 0;
+ if (nla_put_string(m, IFLA_IFNAME, ifp->if_xname) ||
+ nla_put_type<uint32_t>(m, IFLA_LINK, ifp->if_index)) {
+ error = ENOBUFS;
+ goto done;
+ }
+ /* Add hw address info */
+ for (ifa = ifp->if_addr; ifa != NULL; ifa = TAILQ_NEXT(ifa, ifa_link)) {
+ if (ifa->ifa_addr->sa_family == AF_LINK)
+ break;
+ }
+ if (ifa) {
+ if (nla_put_sockaddr(m, IFLA_ADDRESS, ifa->ifa_addr) ||
+ nla_put_sockaddr(m, IFLA_BROADCAST, ifa->ifa_broadaddr)){
+ error = ENOBUFS;
+ goto done;
+ }
+ }
+
+ IF_ADDR_RUNLOCK(ifp);
+ nlmsg_end(m, nlh);
+ }
+ nlh = nlmsg_put(m, nlm->nlmsg_pid, nlm->nlmsg_seq, NLMSG_DONE, 0, nlm->nlmsg_flags);
+
+done:
+ if (ifp != NULL)
+ IF_ADDR_RUNLOCK(ifp);
+ IFNET_RUNLOCK();
+ if (m) {
+ if (!error) {
+ netlink_dispatch(so, m);
+ } else {
+ m_freem(m);
+ }
+ }
+ return (error);
+}
+
+static int
+netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm)
+{
+ struct ifnet *ifp = NULL;
+ struct bsd_ifaddr *ifa;
+ struct nlmsghdr *nlh;
+ struct ifaddrmsg *ifm;
+ struct mbuf *m = NULL;
+ int error = 0;
+
+ m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ if (!m) {
+ return ENOBUFS;
+ }
+
+ IFNET_RLOCK();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ IF_ADDR_RLOCK(ifp);
+ ifa = ifp->if_addr;
+ for (ifa = ifp->if_addr; ifa != NULL; ifa = TAILQ_NEXT(ifa, ifa_link)) {
+ int af = ifa->ifa_addr->sa_family;
+
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ af = LINUX_AF_INET;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ af = LINUX_AF_INET6;
+ break;
+#endif
+ default:
+ af = -1;
+ }
+ if (af < 0)
+ continue;
+
+ if (!ifa->ifa_addr)
+ continue;
+
+ nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_GETADDR, sizeof(*ifm), nlm->nlmsg_flags);
+ if (!nlh) {
+ error = ENOBUFS;
+ goto done;
+ }
+ ifm = (struct ifaddrmsg *) nlmsg_data(nlh);
+ ifm->ifa_index = ifp->if_index;
+ ifm->ifa_family = af;
+ ifm->ifa_prefixlen = get_sockaddr_mask_prefix_len(ifa->ifa_netmask);
+ ifm->ifa_flags = ifp->if_flags | ifp->if_drv_flags;
+ ifm->ifa_scope = 0; // FIXME:
+ if (nla_put_string(m, IFA_LABEL, ifp->if_xname)) {
+ error = ENOBUFS;
+ goto done;
+ }
+#ifdef INET6
+ if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6){
+ // FreeBSD embeds the IPv6 scope ID in the IPv6 address
+ // so need to extract and clear it before returning it.
+ struct bsd_sockaddr_in6 addr, broadaddr;
+ struct bsd_sockaddr *p_addr = ifa->ifa_addr, *p_broadaddr = ifa->ifa_broadaddr;
+ if (p_addr && IN6_IS_ADDR_LINKLOCAL(&((struct bsd_sockaddr_in6 *)p_addr)->sin6_addr)){
+ addr = *(struct bsd_sockaddr_in6 *)p_addr;
+ ifm->ifa_scope = in6_getscope(&addr.sin6_addr);
+ in6_clearscope(&addr.sin6_addr);
+ p_addr = (struct bsd_sockaddr *)&addr;
+ }
+ if (p_broadaddr && IN6_IS_ADDR_LINKLOCAL(&((struct bsd_sockaddr_in6 *)p_broadaddr)->sin6_addr)){
+ broadaddr = *(struct bsd_sockaddr_in6 *)p_broadaddr;
+ in6_clearscope(&broadaddr.sin6_addr);
+ p_broadaddr = (struct bsd_sockaddr *)&broadaddr;
+ }
+ if (nla_put_sockaddr(m, IFA_ADDRESS, p_addr) ||
+ nla_put_sockaddr(m, IFA_BROADCAST, p_broadaddr)){
+ error = ENOBUFS;
+ goto done;
+ }
+ }
+ else
+#endif
+ {
+ if (nla_put_sockaddr(m, IFA_ADDRESS, ifa->ifa_addr) ||
+ nla_put_sockaddr(m, IFA_BROADCAST, ifa->ifa_broadaddr)){
+ error = ENOBUFS;
+ goto done;
+ }
+ }
+ nlmsg_end(m, nlh);
+ }
+
+ IF_ADDR_RUNLOCK(ifp);
+ }
+ nlh = nlmsg_put(m, nlm->nlmsg_pid, nlm->nlmsg_seq, NLMSG_DONE, 0, nlm->nlmsg_flags);
+done:
+ if (ifp != NULL)
+ IF_ADDR_RUNLOCK(ifp);
+ IFNET_RUNLOCK();
+ if (m) {
+ if (!error) {
+ netlink_dispatch(so, m);
+ } else {
+ m_freem(m);
+ }
+ }
+ return (error);
+}
+
+static uint16_t lle_state_to_ndm_state(int family, int state)
+{
+#ifdef INET6
+ if (family == AF_INET6) {
+ switch(state) {
+ case ND6_LLINFO_INCOMPLETE:
+ return NUD_INCOMPLETE;
+ case ND6_LLINFO_REACHABLE:
+ return NUD_REACHABLE;
+ case ND6_LLINFO_STALE:
+ return NUD_STALE;
+ case ND6_LLINFO_DELAY:
+ return NUD_DELAY;
+ case ND6_LLINFO_PROBE:
+ return NUD_PROBE;
+ case ND6_LLINFO_NOSTATE:
+ default:
+ return 0;
+ }
+ }
+#endif
+ if (family == AF_INET) {
+ return NUD_REACHABLE;
+ }
+
+ return 0;
+}
+
+static int netlink_bsd_to_linux_family(int family)
+{
+ switch(family) {
+ case AF_INET:
+ return LINUX_AF_INET;
+#ifdef INET6
+ case AF_INET6:
+ return LINUX_AF_INET6;
+#endif
+ default:
+ return -1;
+ }
+}
+
+struct netlink_getneigh_lle_cbdata {
+ struct nlmsghdr *nlm;
+ struct mbuf *m;
+ uint16_t family;
+ uint16_t state;
+};
+
+static int
+netlink_getneigh_lle_cb(struct lltable *llt, struct llentry *lle, void *data)
+{
+ struct netlink_getneigh_lle_cbdata *cbdata = (struct netlink_getneigh_lle_cbdata *) data;
+ int ndm_family = netlink_bsd_to_linux_family(llt->llt_af);
+ int ndm_state = lle_state_to_ndm_state(llt->llt_af, lle->ln_state);
+
+ if (cbdata->family && cbdata->family != ndm_family)
+ return 0;
+
+ if (cbdata->state && !(cbdata->state & ndm_state))
+ return 0;
+
+ struct nlmsghdr *nlm = cbdata->nlm;
+ struct mbuf *m = cbdata->m;
+ struct ndmsg *ndm;
+ struct nlmsghdr *nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_GETNEIGH, sizeof(*ndm), nlm->nlmsg_flags);
+
+ if (!nlh) {
+ return ENOBUFS;
+ }
+
+ ndm = (struct ndmsg *) nlmsg_data(nlh);
+ ndm->ndm_family = ndm_family;
+ ndm->ndm_ifindex = llt->llt_ifp->if_index;
+ ndm->ndm_state = ndm_state;
+ ndm->ndm_flags = 0;
+ if (lle->ln_router)
+ ndm->ndm_flags |= NTF_ROUTER;
+ ndm->ndm_type = 0;
+
+ struct bsd_sockaddr *sa = L3_ADDR(lle);
+ if (sa->sa_family == AF_INET) {
+ struct bsd_sockaddr_in *sa4 = (struct bsd_sockaddr_in *) sa;
+ if (nla_put_type(m, NDA_DST, sa4->sin_addr)) {
+ return ENOBUFS;
+ }
+ }
+#ifdef INET6
+ else if (sa->sa_family == AF_INET6) {
+ struct bsd_sockaddr_in6 sa6 = *(struct bsd_sockaddr_in6 *) sa;
+ if (IN6_IS_ADDR_LINKLOCAL(&sa6.sin6_addr)){
+ in6_clearscope(&sa6.sin6_addr);
+ }
+ if (nla_put_type(m, NDA_DST, sa6.sin6_addr)) {
+ return ENOBUFS;
+ }
+ }
+#endif
+
+ if (nla_put(m, NDA_LLADDR, 6, lle->ll_addr.mac16)) {
+ return ENOBUFS;
+ }
+
+ nlmsg_end(m, nlh);
+
+ return 0;
+}
+
+
+static int
+netlink_getneigh_lltable_cb(struct lltable *llt, void *cbdata)
+{
+ struct netlink_getneigh_lle_cbdata *data = (struct netlink_getneigh_lle_cbdata *) cbdata;
+ int error = 0;
+
+ if (data->family && data->family != netlink_bsd_to_linux_family(llt->llt_af))
+ return 0;
+ if (llt->llt_ifp->if_flags & IFF_LOOPBACK)
+ return 0;
+
+ IF_AFDATA_RLOCK(llt->llt_ifp);
+ error = lltable_foreach_lle(llt, netlink_getneigh_lle_cb, data);
+ IF_AFDATA_RUNLOCK(llt->llt_ifp);
+
+ return error;
+}
+
+static int
+netlink_process_getneigh_msg(struct socket *so, struct nlmsghdr *nlm)
+{
+ struct mbuf *m = NULL;
+ struct nlmsghdr *nlh;
+ struct netlink_getneigh_lle_cbdata cbdata;
+ int error;
+
+ if (nlm->nlmsg_len < sizeof (struct ndmsg)) {
+ return EINVAL;
+ }
+
+ m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ if (!m) {
+ return ENOBUFS;
+ }
+
+ struct ndmsg *ndm = (struct ndmsg *) nlmsg_data(nlm);
+
+ cbdata.nlm = nlm;
+ cbdata.m = m;
+ cbdata.family = ndm->ndm_family;
+ cbdata.state = ndm->ndm_state;
+
+ error = lltable_foreach(netlink_getneigh_lltable_cb, &cbdata);
+
+ if (!error) {
+ nlh = nlmsg_put(m, nlm->nlmsg_pid, nlm->nlmsg_seq, NLMSG_DONE, 0, nlm->nlmsg_flags);
+ netlink_dispatch(so, m);
+ } else {
+ m_free(m);
+ }
+
+ return error;
+}
+
+static int
+netlink_process_msg(struct mbuf *m, struct socket *so)
+{
+ struct nlmsghdr *nlm = NULL;
+ int len, error = 0;
+
+#define senderr(e) { error = e; goto flush;}
+ if (m == NULL || (m->m_hdr.mh_flags & M_PKTHDR) == 0)
+ panic("Invalid message");
+ len = m->M_dat.MH.MH_pkthdr.len;
+ if (len < sizeof(struct nlmsghdr))
+ senderr(EINVAL);
+ if ((m = m_pullup(m, len)) == NULL)
+ senderr(ENOBUFS);
+ if (len != mtod(m, struct nlmsghdr *)->nlmsg_len)
+ senderr(EINVAL);
+ nlm = mtod(m, struct nlmsghdr *);
+
+ switch(nlm->nlmsg_type) {
+ case LINUX_RTM_GETLINK:
+ error = netlink_process_getlink_msg(so, nlm);
+ break;
+ case LINUX_RTM_GETADDR:
+ error = netlink_process_getaddr_msg(so, nlm);
+ break;
+ case LINUX_RTM_GETNEIGH:
+ error = netlink_process_getneigh_msg(so, nlm);
+ break;
+ default:
+ senderr(EOPNOTSUPP);
+ }
+
+flush:
+ if (error) {
+ netlink_senderr(so, nlm, error);
+ }
+ if (m) {
+ m_freem(m);
+ }
+
+ return (error);
+}
+
+static int
+netlink_output(struct mbuf *m, struct socket *so)
+{
+ return netlink_process_msg(m, so);
+}
+
+/*
+ * Definitions of protocols supported in the NETLINK domain.
+ */
+
+extern struct domain netlinkdomain; /* or at least forward */
+
+static struct protosw netlinksw[] = {
+ initialize_with([] (protosw& x) {
+ x.pr_type = SOCK_RAW;
+ x.pr_domain = &netlinkdomain;
+ x.pr_flags = PR_ATOMIC|PR_ADDR;
+ x.pr_output = netlink_output;
+ x.pr_ctlinput = raw_ctlinput;
+ x.pr_init = raw_init;
+ x.pr_usrreqs = &netlink_usrreqs;
+ }),
+ initialize_with([] (protosw& x) {
+ x.pr_type = SOCK_DGRAM;
+ x.pr_domain = &netlinkdomain;
+ x.pr_flags = PR_ATOMIC|PR_ADDR;
+ x.pr_output = netlink_output;
+ x.pr_ctlinput = raw_ctlinput;
+ x.pr_init = raw_init;
+ x.pr_usrreqs = &netlink_usrreqs;
+ }),
+};
+
+struct domain netlinkdomain = initialize_with([] (domain& x) {
+ x.dom_family = PF_NETLINK;
+ x.dom_name = "netlink";
+ x.dom_protosw = netlinksw;
+ x.dom_protoswNPROTOSW = &netlinksw[sizeof(netlinksw)/sizeof(netlinksw[0])];
+});
+
+VNET_DOMAIN_SET(netlink);
diff --git a/bsd/sys/compat/linux/linux_netlink.h b/bsd/sys/compat/linux/linux_netlink.h
new file mode 100644
index 00000000..70a25903
--- /dev/null
+++ b/bsd/sys/compat/linux/linux_netlink.h
@@ -0,0 +1,175 @@
+#ifndef _NETLINK_H_
+#define _NETLINK_H_
+
+#include <sys/cdefs.h>
+
+struct nlmsghdr {
+ uint32_t nlmsg_len; /* Length of message including header */
+ uint16_t nlmsg_type; /* Type of message content */
+ uint16_t nlmsg_flags; /* Additional flags */
+ uint32_t nlmsg_seq; /* Sequence number */
+ uint32_t nlmsg_pid; /* Sender port ID */
+};
+
+struct nlmsgerr {
+ int error; /* Negative errno or 0 for ack */
+ struct nlmsghdr msg; /* Message that caused the error */
+};
+
+
+#define NLMSG_ALIGNTO 4U
+#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) )
+#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
+#define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN)
+#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len))
+#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0)))
+#define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \
+ (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len)))
+#define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \
+ (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \
+ (nlh)->nlmsg_len <= (len))
+#define NLMSG_PAYLOAD(nlh,len) ((nlh)->nlmsg_len - NLMSG_SPACE((len)))
+
+#define NLMSG_NOOP 0x1 /* Nothing. */
+#define NLMSG_ERROR 0x2 /* Error */
+#define NLMSG_DONE 0x3 /* End of a dump */
+#define NLMSG_OVERRUN 0x4 /* Data lost */
+
+
+static inline int nlmsg_msg_size(int payload) {
+ return NLMSG_HDRLEN + payload;
+}
+
+static inline void *nlmsg_data(const struct nlmsghdr *nlh) {
+ return (unsigned char *) nlh + NLMSG_HDRLEN;
+}
+
+
+struct nlattr {
+ uint16_t nla_len;
+ uint16_t nla_type;
+};
+
+
+#define NLA_F_NESTED (1 << 15)
+#define NLA_F_NET_BYTEORDER (1 << 14)
+#define NLA_TYPE_MASK ~(NLA_F_NESTED | NLA_F_NET_BYTEORDER)
+
+#define NLA_ALIGNTO 4
+#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
+#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr)))
+
+static inline int nla_attr_size(int payload)
+{
+ return NLA_HDRLEN + payload;
+}
+
+static inline int nla_total_size(int payload)
+{
+ return NLA_ALIGN(nla_attr_size(payload));
+}
+
+static inline void *nla_data(const struct nlattr *nla)
+{
+ return (char *) nla + NLA_HDRLEN;
+}
+
+#define LINUX_RTM_NEWLINK 16
+#define LINUX_RTM_DELLINK 17
+#define LINUX_RTM_GETLINK 18
+#define LINUX_RTM_SETLINK 19
+#define LINUX_RTM_NEWADDR 20
+#define LINUX_RTM_DELADDR 21
+#define LINUX_RTM_GETADDR 22
+#define LINUX_RTM_NEWNEIGH 28
+#define LINUX_RTM_DELNEIGH 29
+#define LINUX_RTM_GETNEIGH 30
+
+struct ifinfomsg {
+ unsigned char ifi_family;
+ unsigned char __ifi_pad;
+ unsigned short ifi_type; /* ARPHRD_* */
+ int ifi_index; /* Link index */
+ unsigned ifi_flags; /* IFF_* flags */
+ unsigned ifi_change; /* IFF_* change mask */
+};
+
+#define IFLA_UNSPEC 0
+#define IFLA_ADDRESS 1
+#define IFLA_BROADCAST 2
+#define IFLA_IFNAME 3
+#define IFLA_MTU 4
+#define IFLA_LINK 5
+
+struct ifaddrmsg {
+ uint8_t ifa_family;
+ uint8_t ifa_prefixlen; /* The prefix length */
+ uint8_t ifa_flags; /* Flags */
+ uint8_t ifa_scope; /* Address scope */
+ uint32_t ifa_index; /* Link index */
+};
+
+#define IFA_UNSPEC 0
+#define IFA_ADDRESS 1
+#define IFA_LOCAL 2
+#define IFA_LABEL 3
+#define IFA_BROADCAST 4
+#define IFA_ANYCAST 5
+#define IFA_CACHEINFO 6
+#define IFA_MULTICAST 7
+#define IFA_FLAGS 8
+
+/* ifa_flags */
+#define IFA_F_SECONDARY 0x01
+#define IFA_F_TEMPORARY IFA_F_SECONDARY
+#define IFA_F_NODAD 0x02
+#define IFA_F_OPTIMISTIC 0x04
+#define IFA_F_DADFAILED 0x08
+#define IFA_F_HOMEADDRESS 0x10
+#define IFA_F_DEPRECATED 0x20
+#define IFA_F_TENTATIVE 0x40
+#define IFA_F_PERMANENT 0x80
+#define IFA_F_MANAGETEMPADDR 0x100
+#define IFA_F_NOPREFIXROUTE 0x200
+#define IFA_F_MCAUTOJOIN 0x400
+#define IFA_F_STABLE_PRIVACY 0x800
+
+struct ndmsg {
+ uint8_t ndm_family;
+ uint8_t ndm_pad1;
+ uint16_t ndm_pad2;
+ int32_t ndm_ifindex;
+ uint16_t ndm_state;
+ uint8_t ndm_flags;
+ uint8_t ndm_type;
+};
+
+#define NDA_UNSPEC 0x0
+#define NDA_DST 0x01
+#define NDA_LLADDR 0x02
+#define NDA_CACHEINFO 0x03
+
+#define NTF_USE 0x01
+#define NTF_SELF 0x02
+#define NTF_MASTER 0x04
+#define NTF_PROXY 0x08
+#define NTF_EXT_LEARNED 0x10
+#define NTF_OFFLOADED 0x20
+#define NTF_ROUTER 0x80
+
+#define NUD_INCOMPLETE 0x01
+#define NUD_REACHABLE 0x02
+#define NUD_STALE 0x04
+#define NUD_DELAY 0x08
+#define NUD_PROBE 0x10
+#define NUD_FAILED 0x20
+
+/* Domain ID for supporting NETLINK socket on FreeBSD (actually 16 on Linux) */
+#define AF_NETLINK AF_VENDOR00
+#define PF_NETLINK AF_NETLINK
+
+__BEGIN_DECLS
+void netlink_init(void);
+__END_DECLS
+
+#endif /* _NETLINK_H_ */
diff --git a/bsd/sys/compat/linux/linux_socket.cc b/bsd/sys/compat/linux/linux_socket.cc
index cee3993b..b2addf38 100644
--- a/bsd/sys/compat/linux/linux_socket.cc
+++ b/bsd/sys/compat/linux/linux_socket.cc
@@ -57,6 +57,7 @@

#include <bsd/sys/compat/linux/linux.h>
#include <bsd/sys/compat/linux/linux_socket.h>
+#include <bsd/sys/compat/linux/linux_netlink.h>
#include <osv/stubbing.hh>

#define __NEED_sa_family_t
@@ -257,6 +258,8 @@ linux_to_bsd_domain(int domain)
return (AF_IPX);
case LINUX_AF_APPLETALK:
return (AF_APPLETALK);
+ case LINUX_AF_NETLINK:
+ return (AF_NETLINK);
}
return (-1);
}
@@ -280,6 +283,8 @@ bsd_to_linux_domain(int domain)
return (LINUX_AF_IPX);
case AF_APPLETALK:
return (LINUX_AF_APPLETALK);
+ case AF_NETLINK:
+ return (LINUX_AF_NETLINK);
}
return (-1);
}
diff --git a/bsd/sys/compat/linux/linux_socket.h b/bsd/sys/compat/linux/linux_socket.h
index 6afa53e5..61f8c716 100644
--- a/bsd/sys/compat/linux/linux_socket.h
+++ b/bsd/sys/compat/linux/linux_socket.h
@@ -92,6 +92,7 @@
#define LINUX_AF_IPX 4
#define LINUX_AF_APPLETALK 5
#define LINUX_AF_INET6 10
+#define LINUX_AF_NETLINK 16

/* Supported socket types */

diff --git a/bsd/sys/net/if_llatbl.cc b/bsd/sys/net/if_llatbl.cc
index 869c5524..b1b5112a 100644
--- a/bsd/sys/net/if_llatbl.cc
+++ b/bsd/sys/net/if_llatbl.cc
@@ -40,9 +40,9 @@
#include <bsd/sys/net/route.h>
#include <bsd/sys/net/vnet.h>
#include <bsd/sys/netinet/if_ether.h>
-#if 0
-#include <netinet6/in6_var.h>
-#include <netinet6/nd6.h>
+#ifdef INET6
+#include <bsd/sys/netinet6/in6_var.h>
+#include <bsd/sys/netinet6/nd6.h>
#endif

MALLOC_DEFINE(M_LLTABLE, "lltable", "link level address tables");
@@ -497,3 +497,43 @@ DB_SHOW_ALL_COMMAND(lltables, db_show_all_lltables)
}
}
#endif
+
+/*
+ * Iterate over all lltables
+ */
+int lltable_foreach(int (*func)(struct lltable *llt, void *cbdata), void *cbdata)
+{
+ struct lltable *llt;
+ int error = 0;
+
+ LLTABLE_RLOCK();
+ SLIST_FOREACH(llt, &V_lltables, llt_link) {
+ if ((error = func(llt, cbdata)) != 0)
+ break;
+ }
+ LLTABLE_RUNLOCK();
+
+ return error;
+}
+
+/*
+ * Iterate over all llentries in the lltable
+ */
+int lltable_foreach_lle(struct lltable *llt, int (*func)(struct lltable *llt, struct llentry *lle, void *cbdata), void *cbdata)
+{
+ struct llentry *lle;
+ int i;
+ int error = 0;
+
+ for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
+ LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
+ /* skip deleted entries */
+ if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
+ continue;
+ if ((error = func(llt, lle, cbdata)) != 0)
+ break;
+ }
+ }
+
+ return error;
+}
diff --git a/bsd/sys/net/if_llatbl.h b/bsd/sys/net/if_llatbl.h
index 1cac880a..6e0985ec 100644
--- a/bsd/sys/net/if_llatbl.h
+++ b/bsd/sys/net/if_llatbl.h
@@ -197,6 +197,17 @@ int lltable_sysctl_dumparp(int, struct sysctl_req *);
size_t llentry_free(struct llentry *);
struct llentry *llentry_alloc(struct ifnet *, struct lltable *,
struct bsd_sockaddr_storage *);
+
+/*
+ * Iterate over all lltables
+ */
+int lltable_foreach(int (*func)(struct lltable *llt, void *cbdata), void *cbdata);
+
+/*
+ * Iterate over all llentries in the lltable
+ */
+int lltable_foreach_lle(struct lltable *llt, int (*func)(struct lltable *llt, struct llentry *lle, void *cbdata), void *cbdata);
+
__END_DECLS

/*
@@ -216,4 +227,6 @@ lla_lookup_fast(struct lltable *llt, u_int flags, const struct bsd_sockaddr *l3a
}

int lla_rt_output(struct rt_msghdr *, struct rt_addrinfo *);
+
+
#endif /* _NET_IF_LLATBL_H_ */
diff --git a/bsd/sys/net/netisr.h b/bsd/sys/net/netisr.h
index 7dc4ab1b..932e470b 100644
--- a/bsd/sys/net/netisr.h
+++ b/bsd/sys/net/netisr.h
@@ -61,6 +61,7 @@
#define NETISR_IPV6 10
#define NETISR_NATM 11
#define NETISR_EPAIR 12 /* if_epair(4) */
+#define NETISR_NETLINK 13 /* Linux NETLINK */

/*
* Protocol ordering and affinity policy constants. See the detailed
--
2.35.1

Waldemar Kozaczuk

unread,
Jun 3, 2022, 9:28:48 PM6/3/22
to osv...@googlegroups.com, Waldemar Kozaczuk
This patch fixes a minor bug in handling RTM_GETADDR and RTM_GETNEIGH
requests. It tweaks the relevant code to set the RTM_NEWADDR and RTM_NEWNEIGH
type for the responses respectively.

This is important as for example Golang runtime tests the nlmsg_type of
the netlink response and breaks if it is wrong.

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>
---
bsd/sys/compat/linux/linux_netlink.cc | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc
index bc02bb7f..ea0cf609 100644
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -588,7 +588,7 @@ netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm)
if (!ifa->ifa_addr)
continue;

- nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_GETADDR, sizeof(*ifm), nlm->nlmsg_flags);
+ nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_NEWADDR, sizeof(*ifm), nlm->nlmsg_flags);
if (!nlh) {
error = ENOBUFS;
goto done;
@@ -720,7 +720,7 @@ netlink_getneigh_lle_cb(struct lltable *llt, struct llentry *lle, void *data)
struct nlmsghdr *nlm = cbdata->nlm;
struct mbuf *m = cbdata->m;
struct ndmsg *ndm;
- struct nlmsghdr *nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_GETNEIGH, sizeof(*ndm), nlm->nlmsg_flags);
+ struct nlmsghdr *nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_NEWNEIGH, sizeof(*ndm), nlm->nlmsg_flags);

if (!nlh) {
return ENOBUFS;
@@ -753,7 +753,7 @@ netlink_getneigh_lle_cb(struct lltable *llt, struct llentry *lle, void *data)
}
}
#endif
-
+
if (nla_put(m, NDA_LLADDR, 6, lle->ll_addr.mac16)) {
return ENOBUFS;
}
@@ -875,29 +875,29 @@ extern struct domain netlinkdomain; /* or at least forward */

static struct protosw netlinksw[] = {
initialize_with([] (protosw& x) {
- x.pr_type = SOCK_RAW;
+ x.pr_type = SOCK_RAW;
x.pr_domain = &netlinkdomain;
x.pr_flags = PR_ATOMIC|PR_ADDR;
x.pr_output = netlink_output;
x.pr_ctlinput = raw_ctlinput;
- x.pr_init = raw_init;
+ x.pr_init = raw_init;
x.pr_usrreqs = &netlink_usrreqs;
}),
initialize_with([] (protosw& x) {
- x.pr_type = SOCK_DGRAM;
+ x.pr_type = SOCK_DGRAM;
x.pr_domain = &netlinkdomain;
x.pr_flags = PR_ATOMIC|PR_ADDR;
x.pr_output = netlink_output;
x.pr_ctlinput = raw_ctlinput;
- x.pr_init = raw_init;
+ x.pr_init = raw_init;
x.pr_usrreqs = &netlink_usrreqs;
}),
};

struct domain netlinkdomain = initialize_with([] (domain& x) {
- x.dom_family = PF_NETLINK;
- x.dom_name = "netlink";
- x.dom_protosw = netlinksw;
+ x.dom_family = PF_NETLINK;
+ x.dom_name = "netlink";
+ x.dom_protosw = netlinksw;
x.dom_protoswNPROTOSW = &netlinksw[sizeof(netlinksw)/sizeof(netlinksw[0])];
});

--
2.35.1

Waldemar Kozaczuk

unread,
Jun 3, 2022, 9:28:50 PM6/3/22
to osv...@googlegroups.com, Waldemar Kozaczuk
Golang uses the netlink interface RTM_GETADDR to query the network interfaces and IPs.
It assumes that the 1st attribute in the RTM_NEWADDR response is IFA_ADDRESS. This
patch changes the order in which RTM_NEWADDR attributes are sent to make
sure the IFA_ADDRESS goes first and IFA_LABEL last.

This does not seem to be documented anywhere but Linux sends RTM_NEWADDR responses
with the IFA_ADDRESS attribute first so we follow suit.

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>
---
bsd/sys/compat/linux/linux_netlink.cc | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc
index ea0cf609..4208ce7f 100644
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -599,10 +599,6 @@ netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm)
ifm->ifa_prefixlen = get_sockaddr_mask_prefix_len(ifa->ifa_netmask);
ifm->ifa_flags = ifp->if_flags | ifp->if_drv_flags;
ifm->ifa_scope = 0; // FIXME:
- if (nla_put_string(m, IFA_LABEL, ifp->if_xname)) {
- error = ENOBUFS;
- goto done;
- }
#ifdef INET6
if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6){
// FreeBSD embeds the IPv6 scope ID in the IPv6 address
@@ -635,6 +631,10 @@ netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm)
goto done;
}
}
+ if (nla_put_string(m, IFA_LABEL, ifp->if_xname)) {
+ error = ENOBUFS;
+ goto done;
+ }
nlmsg_end(m, nlh);
}

--
2.35.1

Waldemar Kozaczuk

unread,
Jun 3, 2022, 9:28:52 PM6/3/22
to osv...@googlegroups.com, Waldemar Kozaczuk
This enhances the netlink_attach() to capture or generate the source
nl_pid (if 0) and save it in the control back that could be fetched later when
necessary. This will be useful in the next patch.

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>
---
bsd/sys/compat/linux/linux_netlink.cc | 25 +++++++++++++++++++++----
1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc
index 7e743db8..fcdab06b 100644
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -61,6 +61,14 @@ struct bsd_sockaddr_nl {
uint32_t nl_groups; /* Multicast groups mask */
};

+struct netlinkcb {
+ struct rawcb raw;
+ pid_t nl_pid;
+};
+
+std::atomic<pid_t> _nl_next_gen_pid(2);
+
+
MALLOC_DEFINE(M_NETLINK, "netlink", "netlink socket");

static struct bsd_sockaddr netlink_src = { 2, PF_NETLINK, };
@@ -311,16 +319,18 @@ netlink_close(struct socket *so)
static int
netlink_attach(struct socket *so, int proto, struct thread *td)
{
+ struct netlinkcb *ncb;
struct rawcb *rp;
int s, error;

KASSERT(so->so_pcb == NULL, ("netlink_attach: so_pcb != NULL"));

/* XXX */
- rp = (rawcb *)malloc(sizeof *rp);
- if (rp == NULL)
+ ncb = (netlinkcb *)malloc(sizeof *ncb);
+ if (ncb == NULL)
return ENOBUFS;
- bzero(rp, sizeof *rp);
+ bzero(ncb, sizeof *ncb);
+ rp = &ncb->raw;

/*
* The splnet() is necessary to block protocols from sending
@@ -362,7 +372,14 @@ netlink_bind(struct socket *so, struct bsd_sockaddr *nam, struct thread *td)
__FILE__, __LINE__, __FUNCTION__, nam->sa_len, sizeof(struct bsd_sockaddr_nl));
return EINVAL;
}
- // TODO: stash the nl_pid somewhere
+ auto *ncb = reinterpret_cast<netlinkcb*>(rp);
+ bsd_sockaddr_nl *nl_sock_addr = (bsd_sockaddr_nl*)nam;
+ if (nl_sock_addr->nl_pid == 0) { // kernel needs to assign pid
+ auto assigned_pid = _nl_next_gen_pid.fetch_add(1, std::memory_order_relaxed);
+ ncb->nl_pid = assigned_pid;
+ } else {
+ ncb->nl_pid = nl_sock_addr->nl_pid;
+ }
return 0;
}
return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */
--
2.35.1

Waldemar Kozaczuk

unread,
Jun 3, 2022, 9:28:52 PM6/3/22
to osv...@googlegroups.com, Waldemar Kozaczuk
This is a minor adjustment to make OSv implementation match what Linux
does - skip IFA_BROADCAST attributes for loopback address in NEWADDR
response.

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>
---
bsd/sys/compat/linux/linux_netlink.cc | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc
index 4208ce7f..7e743db8 100644
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -616,8 +616,11 @@ netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm)
in6_clearscope(&broadaddr.sin6_addr);
p_broadaddr = (struct bsd_sockaddr *)&broadaddr;
}
- if (nla_put_sockaddr(m, IFA_ADDRESS, p_addr) ||
- nla_put_sockaddr(m, IFA_BROADCAST, p_broadaddr)){
+ if (nla_put_sockaddr(m, IFA_ADDRESS, p_addr)){
+ error = ENOBUFS;
+ goto done;
+ }
+ if (!(ifm->ifa_flags & IFF_LOOPBACK) && nla_put_sockaddr(m, IFA_BROADCAST, p_broadaddr)){
error = ENOBUFS;
goto done;
}
@@ -625,8 +628,11 @@ netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm)
else
#endif
{
- if (nla_put_sockaddr(m, IFA_ADDRESS, ifa->ifa_addr) ||
- nla_put_sockaddr(m, IFA_BROADCAST, ifa->ifa_broadaddr)){
+ if (nla_put_sockaddr(m, IFA_ADDRESS, ifa->ifa_addr)){
+ error = ENOBUFS;
+ goto done;
+ }
+ if (!(ifm->ifa_flags & IFF_LOOPBACK) && nla_put_sockaddr(m, IFA_BROADCAST, ifa->ifa_broadaddr)){
error = ENOBUFS;
goto done;
}
--
2.35.1

Waldemar Kozaczuk

unread,
Jun 3, 2022, 9:28:54 PM6/3/22
to osv...@googlegroups.com, Waldemar Kozaczuk
There are three types of pid used in netlink interface:
- the nl_pid on the source (app) side (part of sockaddr_nl) set before
bind(); could be 0 to request kernel generating new one
- the nl_pid on the destination (kernel) size set into dst_addr that
always needs to be 0 if we communicate with kernel
- the nlmsg_pid (sender port ID) that is part of the netlink message
header sent to and received from kernel

Some relevant information from Linux docs:

" nlmsg_seq and nlmsg_pid are used to track messages. nlmsg_pid
shows the origin of the message. Note that there isn't a 1:1
relationship between nlmsg_pid and the PID of the process if the
message originated from a netlink socket. See the ADDRESS
FORMATS section for further information.

Both nlmsg_seq and nlmsg_pid are opaque to netlink core."

and:

" nl_pid is the unicast address of netlink socket. It's always 0
if the destination is in the kernel. For a user-space process,
nl_pid is usually the PID of the process owning the destination
socket. However, nl_pid identifies a netlink socket, not a
process. If a process owns several netlink sockets, then nl_pid
can be equal to the process ID only for at most one socket.
There are two ways to assign nl_pid to a netlink socket. If the
application sets nl_pid before calling bind(2), then it is up to
the application to make sure that nl_pid is unique. If the
application sets it to 0, the kernel takes care of assigning it.
The kernel assigns the process ID to the first netlink socket the
process opens and assigns a unique nl_pid to every netlink socket
that the process subsequently creates."

The 1st one needs to be stashed or generated (if 0) and then set on nlmsg_pid
for each response so that the application receving it can distinguish it
if necessary. Golang runtime actually calls sockname() and verifies that
the nlmsg_pid in the replies matches the nl_pid on the source socket.

The patch modifies relevant code that builds netlink responses
to put the nl_pid stashed during socket attach process to set it as
value of inlmsg_pid. It also re-implements the netlink_sockaddr()
to make it return information including the source PID.

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>
---
bsd/sys/compat/linux/linux_netlink.cc | 41 +++++++++++++++++++--------
bsd/sys/net/if_llatbl.cc | 8 +++---
bsd/sys/net/if_llatbl.h | 4 +--
3 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc
index fcdab06b..82205d2b 100644
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -436,10 +436,27 @@ netlink_shutdown(struct socket *so)
return (raw_usrreqs.pru_shutdown(so));
}

+static pid_t
+get_socket_pid(struct socket *so)
+{
+ struct rawcb *rp = sotorawcb(so);
+ struct netlinkcb *ncb = (netlinkcb *)rp;
+ return ncb->nl_pid;
+}
+
static int
netlink_sockaddr(struct socket *so, struct bsd_sockaddr **nam)
{
- return (raw_usrreqs.pru_sockaddr(so, nam));
+ struct bsd_sockaddr_nl *sin;
+
+ sin = (bsd_sockaddr_nl*)malloc(sizeof *sin);
+ bzero(sin, sizeof *sin);
+ sin->nl_family = AF_NETLINK;
+ sin->nl_len = sizeof(*sin);
+ sin->nl_pid = get_socket_pid(so);
+
+ *nam = (bsd_sockaddr*)sin;
+ return 0;
}

static struct pr_usrreqs netlink_usrreqs = initialize_with([] (pr_usrreqs& x) {
@@ -474,7 +491,7 @@ netlink_senderr(struct socket *so, struct nlmsghdr *nlm, int error)
}

if ((hdr = (struct nlmsghdr *)nlmsg_put(m,
- nlm ? nlm->nlmsg_pid : 0,
+ get_socket_pid(so),
nlm ? nlm->nlmsg_seq : 0,
NLMSG_ERROR, sizeof(*err),
nlm ? nlm->nlmsg_flags : 0)) == NULL) {
@@ -513,7 +530,7 @@ netlink_process_getlink_msg(struct socket *so, struct nlmsghdr *nlm)
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
IF_ADDR_RLOCK(ifp);

- nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_NEWLINK, sizeof(*ifm), nlm->nlmsg_flags);
+ nlh = nlmsg_begin(m, get_socket_pid(so), nlm->nlmsg_seq, LINUX_RTM_NEWLINK, sizeof(*ifm), nlm->nlmsg_flags);
if (!nlh) {
error = ENOBUFS;
goto done;
@@ -547,7 +564,7 @@ netlink_process_getlink_msg(struct socket *so, struct nlmsghdr *nlm)
IF_ADDR_RUNLOCK(ifp);
nlmsg_end(m, nlh);
}
- nlh = nlmsg_put(m, nlm->nlmsg_pid, nlm->nlmsg_seq, NLMSG_DONE, 0, nlm->nlmsg_flags);
+ nlh = nlmsg_put(m, get_socket_pid(so), nlm->nlmsg_seq, NLMSG_DONE, 0, nlm->nlmsg_flags);

done:
if (ifp != NULL)
@@ -605,7 +622,7 @@ netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm)
if (!ifa->ifa_addr)
continue;

- nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_NEWADDR, sizeof(*ifm), nlm->nlmsg_flags);
+ nlh = nlmsg_begin(m, get_socket_pid(so), nlm->nlmsg_seq, LINUX_RTM_NEWADDR, sizeof(*ifm), nlm->nlmsg_flags);
if (!nlh) {
error = ENOBUFS;
goto done;
@@ -663,7 +680,7 @@ netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm)

IF_ADDR_RUNLOCK(ifp);
}
- nlh = nlmsg_put(m, nlm->nlmsg_pid, nlm->nlmsg_seq, NLMSG_DONE, 0, nlm->nlmsg_flags);
+ nlh = nlmsg_put(m, get_socket_pid(so), nlm->nlmsg_seq, NLMSG_DONE, 0, nlm->nlmsg_flags);
done:
if (ifp != NULL)
IF_ADDR_RUNLOCK(ifp);
@@ -728,7 +745,7 @@ struct netlink_getneigh_lle_cbdata {
};

static int
-netlink_getneigh_lle_cb(struct lltable *llt, struct llentry *lle, void *data)
+netlink_getneigh_lle_cb(struct socket *so, struct lltable *llt, struct llentry *lle, void *data)
{
struct netlink_getneigh_lle_cbdata *cbdata = (struct netlink_getneigh_lle_cbdata *) data;
int ndm_family = netlink_bsd_to_linux_family(llt->llt_af);
@@ -743,7 +760,7 @@ netlink_getneigh_lle_cb(struct lltable *llt, struct llentry *lle, void *data)
struct nlmsghdr *nlm = cbdata->nlm;
struct mbuf *m = cbdata->m;
struct ndmsg *ndm;
- struct nlmsghdr *nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_NEWNEIGH, sizeof(*ndm), nlm->nlmsg_flags);
+ struct nlmsghdr *nlh = nlmsg_begin(m, get_socket_pid(so), nlm->nlmsg_seq, LINUX_RTM_NEWNEIGH, sizeof(*ndm), nlm->nlmsg_flags);

if (!nlh) {
return ENOBUFS;
@@ -788,7 +805,7 @@ netlink_getneigh_lle_cb(struct lltable *llt, struct llentry *lle, void *data)


static int
-netlink_getneigh_lltable_cb(struct lltable *llt, void *cbdata)
+netlink_getneigh_lltable_cb(struct socket *so, struct lltable *llt, void *cbdata)
{
struct netlink_getneigh_lle_cbdata *data = (struct netlink_getneigh_lle_cbdata *) cbdata;
int error = 0;
@@ -799,7 +816,7 @@ netlink_getneigh_lltable_cb(struct lltable *llt, void *cbdata)
return 0;

IF_AFDATA_RLOCK(llt->llt_ifp);
- error = lltable_foreach_lle(llt, netlink_getneigh_lle_cb, data);
+ error = lltable_foreach_lle(so, llt, netlink_getneigh_lle_cb, data);
IF_AFDATA_RUNLOCK(llt->llt_ifp);

return error;
@@ -829,10 +846,10 @@ netlink_process_getneigh_msg(struct socket *so, struct nlmsghdr *nlm)
cbdata.family = ndm->ndm_family;
cbdata.state = ndm->ndm_state;

- error = lltable_foreach(netlink_getneigh_lltable_cb, &cbdata);
+ error = lltable_foreach(so, netlink_getneigh_lltable_cb, &cbdata);

if (!error) {
- nlh = nlmsg_put(m, nlm->nlmsg_pid, nlm->nlmsg_seq, NLMSG_DONE, 0, nlm->nlmsg_flags);
+ nlh = nlmsg_put(m, get_socket_pid(so), nlm->nlmsg_seq, NLMSG_DONE, 0, nlm->nlmsg_flags);
netlink_dispatch(so, m);
} else {
m_free(m);
diff --git a/bsd/sys/net/if_llatbl.cc b/bsd/sys/net/if_llatbl.cc
index b1b5112a..8bdc0983 100644
--- a/bsd/sys/net/if_llatbl.cc
+++ b/bsd/sys/net/if_llatbl.cc
@@ -501,14 +501,14 @@ DB_SHOW_ALL_COMMAND(lltables, db_show_all_lltables)
/*
* Iterate over all lltables
*/
-int lltable_foreach(int (*func)(struct lltable *llt, void *cbdata), void *cbdata)
+int lltable_foreach(struct socket *so, int (*func)(struct socket *so, struct lltable *llt, void *cbdata), void *cbdata)
{
struct lltable *llt;
int error = 0;

LLTABLE_RLOCK();
SLIST_FOREACH(llt, &V_lltables, llt_link) {
- if ((error = func(llt, cbdata)) != 0)
+ if ((error = func(so, llt, cbdata)) != 0)
break;
}
LLTABLE_RUNLOCK();
@@ -519,7 +519,7 @@ int lltable_foreach(int (*func)(struct lltable *llt, void *cbdata), void *cbdata
/*
* Iterate over all llentries in the lltable
*/
-int lltable_foreach_lle(struct lltable *llt, int (*func)(struct lltable *llt, struct llentry *lle, void *cbdata), void *cbdata)
+int lltable_foreach_lle(struct socket *so, struct lltable *llt, int (*func)(struct socket *so, struct lltable *llt, struct llentry *lle, void *cbdata), void *cbdata)
{
struct llentry *lle;
int i;
@@ -530,7 +530,7 @@ int lltable_foreach_lle(struct lltable *llt, int (*func)(struct lltable *llt, st
/* skip deleted entries */
if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
continue;
- if ((error = func(llt, lle, cbdata)) != 0)
+ if ((error = func(so, llt, lle, cbdata)) != 0)
break;
}
}
diff --git a/bsd/sys/net/if_llatbl.h b/bsd/sys/net/if_llatbl.h
index 6e0985ec..31a7def2 100644
--- a/bsd/sys/net/if_llatbl.h
+++ b/bsd/sys/net/if_llatbl.h
@@ -201,12 +201,12 @@ struct llentry *llentry_alloc(struct ifnet *, struct lltable *,
/*
* Iterate over all lltables
*/
-int lltable_foreach(int (*func)(struct lltable *llt, void *cbdata), void *cbdata);
+int lltable_foreach(struct socket *so, int (*func)(struct socket *so, struct lltable *llt, void *cbdata), void *cbdata);

/*
* Iterate over all llentries in the lltable
*/
-int lltable_foreach_lle(struct lltable *llt, int (*func)(struct lltable *llt, struct llentry *lle, void *cbdata), void *cbdata);
+int lltable_foreach_lle(struct socket *so, struct lltable *llt, int (*func)(struct socket *so, struct lltable *llt, struct llentry *lle, void *cbdata), void *cbdata);

__END_DECLS

--
2.35.1

Waldemar Kozaczuk

unread,
Jun 3, 2022, 9:28:55 PM6/3/22
to osv...@googlegroups.com, Waldemar Kozaczuk
Fix netlink_process_msg() to propagate potential error
from netlink_senderr(). Normally netlink_senderr() should return
0 indicating that the error response was built successfully.
This patch tweaks the logic to make sure the error response
in such case is sent back as a NLMSG_ERROR reply accordingly
instead of making sendmsg() return error.

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>
---
bsd/sys/compat/linux/linux_netlink.cc | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc
index 82205d2b..180d81b5 100644
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -830,7 +830,7 @@ netlink_process_getneigh_msg(struct socket *so, struct nlmsghdr *nlm)
struct netlink_getneigh_lle_cbdata cbdata;
int error;

- if (nlm->nlmsg_len < sizeof (struct ndmsg)) {
+ if (nlm->nlmsg_len < NLMSG_LENGTH(sizeof (struct ndmsg))) {
return EINVAL;
}

@@ -892,7 +892,7 @@ netlink_process_msg(struct mbuf *m, struct socket *so)

flush:
if (error) {
- netlink_senderr(so, nlm, error);
+ error = netlink_senderr(so, nlm, error);
}
if (m) {
m_freem(m);
--
2.35.1

Waldemar Kozaczuk

unread,
Jun 3, 2022, 9:28:57 PM6/3/22
to osv...@googlegroups.com, Waldemar Kozaczuk
Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>
---
bsd/sys/compat/linux/linux_netlink.cc | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc
index 180d81b5..be9ea1b8 100644
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -118,6 +118,7 @@ static int get_sockaddr_mask_prefix_len(struct bsd_sockaddr *sa)
}


+static
void *nl_m_put(struct mbuf *m0, int len)
{
struct mbuf *m, *n;
@@ -151,6 +152,7 @@ void *nl_m_put(struct mbuf *m0, int len)
return data;
}

+static
struct nlmsghdr * nlmsg_put(struct mbuf *m, uint32_t pid, uint32_t seq, int type, int len, int flags)
{
struct nlmsghdr *nlh;
@@ -170,16 +172,19 @@ struct nlmsghdr * nlmsg_put(struct mbuf *m, uint32_t pid, uint32_t seq, int type
return nlh;
}

+static
struct nlmsghdr * nlmsg_begin(struct mbuf *m, uint32_t pid, uint32_t seq, int type, int len, int flags)
{
return nlmsg_put(m, pid, seq, type, len, flags);
}

+static
void nlmsg_end(struct mbuf *m, struct nlmsghdr *nlh)
{
nlh->nlmsg_len = m->M_dat.MH.MH_pkthdr.len - ((uintptr_t)nlh - (uintptr_t)m->m_hdr.mh_data);
}

+static
int nla_put(struct mbuf *m, int attrtype, int len, const void *src)
{
struct nlattr *nla;
@@ -198,16 +203,18 @@ int nla_put(struct mbuf *m, int attrtype, int len, const void *src)
}

template<class T>
-int nla_put_type(struct mbuf *m, int attrtype, T val)
+static int nla_put_type(struct mbuf *m, int attrtype, T val)
{
return nla_put(m, attrtype, sizeof(val), &val);
}

+static
int nla_put_string(struct mbuf *m, int attrtype, const char *str)
{
return nla_put(m, attrtype, strlen(str) + 1, str);
}

+static
int nla_put_sockaddr(struct mbuf *m, int attrtype, struct bsd_sockaddr *sa)
{
void *data;
--
2.35.1

Waldemar Kozaczuk

unread,
Jun 3, 2022, 9:28:58 PM6/3/22
to osv...@googlegroups.com, Waldemar Kozaczuk
The netlink specfication requires that error field contains a negative
value of errno.

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>
---
bsd/sys/compat/linux/linux_netlink.cc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc
index be9ea1b8..ec7e9341 100644
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -506,7 +506,7 @@ netlink_senderr(struct socket *so, struct nlmsghdr *nlm, int error)
return ENOBUFS;
}
err = (struct nlmsgerr *) nlmsg_data(hdr);
- err->error = error;
+ err->error = -error; //Per netlink spec - "Negative errno or 0 for acknowledgements"
if (nlm) {
err->msg = *nlm;
} else {
--
2.35.1

Waldemar Kozaczuk

unread,
Jun 3, 2022, 9:29:00 PM6/3/22
to osv...@googlegroups.com, Waldemar Kozaczuk
This changes bsd/net.cc to enables netlink by registering netlink
domain and calling netlink_init().

It also adds a unit test to verify the netlink implementation.

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>
---
bsd/net.cc | 5 +
modules/tests/Makefile | 3 +-
tests/tst-netlink.c | 441 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 448 insertions(+), 1 deletion(-)
create mode 100644 tests/tst-netlink.c

diff --git a/bsd/net.cc b/bsd/net.cc
index 3e427575..f548e091 100644
--- a/bsd/net.cc
+++ b/bsd/net.cc
@@ -23,6 +23,7 @@
#include <bsd/sys/netinet/cc.h>
#include <bsd/sys/net/ethernet.h>
#include <bsd/sys/net/route.h>
+#include <bsd/sys/compat/linux/linux_netlink.h>

/* Generation of ip ids */
void ip_initid(void);
@@ -32,6 +33,8 @@ extern "C" {
extern struct domain inetdomain;
/* AF_ROUTE */
extern struct domain routedomain;
+ /* AF_NETLINK */
+ extern struct domain netlinkdomain;
}

void net_init(void)
@@ -53,9 +56,11 @@ void net_init(void)
domaininit(NULL);
OSV_DOMAIN_SET(inet);
OSV_DOMAIN_SET(route);
+ OSV_DOMAIN_SET(netlink);
rts_init();
route_init();
vnet_route_init();
+ netlink_init();
ipport_tick_init(NULL);
arp_init();
domainfinalize(NULL);
diff --git a/modules/tests/Makefile b/modules/tests/Makefile
index e462ebc8..f79da870 100644
--- a/modules/tests/Makefile
+++ b/modules/tests/Makefile
@@ -133,7 +133,8 @@ tests := tst-pthread.so misc-ramdisk.so tst-vblk.so tst-bsd-evh.so \
tst-getopt.so tst-getopt-pie.so tst-non-pie.so tst-semaphore.so \
tst-elf-init.so tst-realloc.so tst-setjmp.so \
libtls.so libtls_gold.so tst-tls.so tst-tls-gold.so tst-tls-pie.so \
- tst-sigaction.so tst-syscall.so tst-ifaddrs.so tst-getdents.so
+ tst-sigaction.so tst-syscall.so tst-ifaddrs.so tst-getdents.so \
+ tst-netlink.so
# libstatic-thread-variable.so tst-static-thread-variable.so \

#TODO For now let us disable these tests for aarch64 until
diff --git a/tests/tst-netlink.c b/tests/tst-netlink.c
new file mode 100644
index 00000000..aebc9dd5
--- /dev/null
+++ b/tests/tst-netlink.c
@@ -0,0 +1,441 @@
+/* Unit test that verifies limited netlink support in OSv
+ *
+ * Copyright (C) 2022 Waldemar Kozaczuk
+ *
+ * This work is open source software, licensed under the terms of the
+ * BSD license as described in the LICENSE file in the top-level directory.
+ */
+
+// This test should run on Linux:
+// gcc tests/tst-netlink.c -o tst-netlink
+// ./tst-netlink
+
+#include <stdio.h> //printf, perror
+#include <string.h> //memset, strlen
+#include <stdlib.h> //exit
+#include <unistd.h> //close
+#include <sys/socket.h> //msghdr
+#include <arpa/inet.h> //inet_ntop
+#include <linux/netlink.h> //sockaddr_nl
+#include <linux/rtnetlink.h> //rtgenmsg,ifinfomsg
+#include <net/if.h>
+#include <assert.h>
+#include <errno.h>
+
+#define BUFSIZE 8192
+
+void die(const char *s)
+{
+ perror(s);
+ exit(1);
+}
+
+int called_response_handler = 0;
+
+int test_netlink(struct nlmsghdr* req, pid_t pid, void (*handle_response)(struct nlmsghdr *))
+{
+ struct sockaddr_nl src_addr, dst_addr, src_addr2;
+ int s, len, end = 0;
+ struct msghdr msg;
+ struct iovec iov[1];
+ char buf[BUFSIZE];
+
+ //create a netlink socket
+ if ((s=socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE)) < 0)
+ {
+ die("socket FAILED");
+ }
+
+ //bind socket
+ memset(&src_addr, 0, sizeof(src_addr));
+ src_addr.nl_family = AF_NETLINK;
+ src_addr.nl_pid = pid; // if 0 kernel will assign unique id
+ src_addr.nl_groups = 0; /* not in mcast groups */
+ if (bind(s, (struct sockaddr*) &src_addr, sizeof(src_addr)))
+ {
+ die("bind FAILED");
+ }
+
+ //get sock name to check pid
+ memset(&src_addr2, 0, sizeof(src_addr2));
+ socklen_t addr_len = sizeof(src_addr2);
+ if (getsockname(s, (struct sockaddr*)&src_addr2, &addr_len)) {
+ die("getsockname FAILED");
+ }
+ if (src_addr.nl_pid != 0) {
+ assert(src_addr.nl_pid == src_addr2.nl_pid);
+ }
+
+ //build destination - kernel netlink address
+ memset(&dst_addr, 0, sizeof(dst_addr));
+ dst_addr.nl_family = AF_NETLINK;
+ dst_addr.nl_pid = 0; // should be 0 if destination is kernel
+ //dst_addr.nl_pid = 1; //TODO: check that non-0 errors with "sendmsg: Operation not permitted"
+ dst_addr.nl_groups = 0;
+
+ //build netlink message
+ iov[0].iov_base = req;
+ iov[0].iov_len = req->nlmsg_len;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 1;
+ msg.msg_name = &dst_addr;
+ msg.msg_namelen = sizeof(dst_addr);
+
+ //send the message
+ if (sendmsg(s, &msg, 0) < 0)
+ {
+ die("sendmsg FAILED");
+ }
+
+ called_response_handler = 0;
+ //parse reply
+ while (!end)
+ {
+ memset(&msg, 0, sizeof(msg)); //These and 2 lines below are needed to reset msg - otherwise weird page faults happen
+ msg.msg_iov = iov; //Check if we can improve things downstream with some asserts or even error handling
+ msg.msg_iovlen = 1;
+
+ memset(buf, 0, BUFSIZE);
+ msg.msg_iov[0].iov_base = buf;
+ msg.msg_iov[0].iov_len = BUFSIZE;
+ printf("---> Receiving messages!\n");
+ if ((len=recvmsg(s, &msg, 0)) < 0)
+ {
+ die("recvmsg FAILED");
+ }
+
+ for (struct nlmsghdr *rsp = (struct nlmsghdr *)buf;
+ NLMSG_OK(rsp, len); rsp = NLMSG_NEXT(rsp, len))
+ {
+ printf("... received response: %d!\n", rsp->nlmsg_type);
+ //Verify pid of the response matches pid of the socket
+ assert(rsp->nlmsg_pid == src_addr2.nl_pid);
+ //Verify sequence of the response matches sequence of the request
+ assert(rsp->nlmsg_seq == req->nlmsg_seq);
+ switch (rsp->nlmsg_type)
+ {
+ case NLMSG_DONE:
+ end++;
+ break;
+ case NLMSG_ERROR:
+ called_response_handler = 1;
+ handle_response(rsp);
+ end++;
+ break;
+ default:
+ called_response_handler = 1;
+ handle_response(rsp);
+ break;
+ }
+ }
+ }
+
+ if (close(s)) {
+ die("close FAILED");
+ };
+ return 0;
+}
+
+/////////////////////////////
+//Test RTM_GETLINK requests
+/////////////////////////////
+void test_getlink_response(struct nlmsghdr *rsp)
+{
+ struct ifinfomsg *iface;
+ struct rtattr *attr;
+ int len;
+
+ assert(rsp->nlmsg_type == RTM_NEWLINK);
+
+ iface = NLMSG_DATA(rsp);
+ len = IFLA_PAYLOAD(rsp);
+
+ printf("Interface %d: up=%d\n", iface->ifi_index, (iface->ifi_flags & IFF_UP) != 0);
+
+ assert(iface->ifi_family == AF_UNSPEC);
+ if (iface->ifi_flags & IFF_LOOPBACK) {
+ assert(iface->ifi_index == 1);
+ assert(iface->ifi_flags & IFF_UP);
+ } else {
+ assert(iface->ifi_index > 1);
+ }
+ //TODO: Verify ifi_type
+
+ /* loop over all attributes for the NEWLINK message */
+ for (attr = IFLA_RTA(iface); RTA_OK(attr, len); attr = RTA_NEXT(attr, len))
+ {
+ switch (attr->rta_type)
+ {
+ case IFLA_IFNAME:
+ printf("\tname=%s\n", (char *)RTA_DATA(attr));
+ break;
+ case IFLA_ADDRESS:
+ {
+ unsigned char* ptr = (unsigned char*)RTA_DATA(attr);
+ printf("\taddress=%02x:%02x:%02x:%02x:%02x:%02x\n",
+ ptr[0], ptr[1], ptr[2], ptr[3], ptr[4], ptr[5]);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+struct nl_getlink_req {
+ struct nlmsghdr hdr;
+ struct rtgenmsg gen;
+};
+
+void test_getlink(pid_t pid)
+{
+ //build netlink request
+ struct nl_getlink_req req;
+ memset(&req, 0, sizeof(req));
+ req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
+ req.hdr.nlmsg_type = RTM_GETLINK;
+ req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+ req.hdr.nlmsg_seq = 321;
+ req.hdr.nlmsg_pid = getpid();
+ req.gen.rtgen_family = AF_INET;
+
+ test_netlink(&req.hdr, pid, test_getlink_response);
+ assert(called_response_handler);
+}
+
+/////////////////////////////
+//Test RTM_GETADDR requests
+/////////////////////////////
+void print_ip_address(const char *type, int ip)
+{
+ unsigned char bytes[4];
+ bytes[0] = ip & 0xFF;
+ bytes[1] = (ip >> 8) & 0xFF;
+ bytes[2] = (ip >> 16) & 0xFF;
+ bytes[3] = (ip >> 24) & 0xFF;
+ printf("\t%s ip=%d.%d.%d.%d\n", type, bytes[0], bytes[1], bytes[2], bytes[3]);
+}
+
+void test_getaddr_response(struct nlmsghdr *rsp)
+{
+ struct ifaddrmsg *addr;
+ struct rtattr *attr;
+ int len;
+
+ assert(rsp->nlmsg_type == RTM_NEWADDR);
+
+ addr = NLMSG_DATA(rsp);
+ len = IFA_PAYLOAD(rsp);
+
+ printf("Address %d:\n", addr->ifa_index);
+ assert(addr->ifa_family == AF_INET || addr->ifa_family == AF_INET6);
+
+ /* loop over all attributes for the NEWLINK message */
+ for (attr = IFA_RTA(addr); RTA_OK(attr, len); attr = RTA_NEXT(attr, len))
+ {
+ switch (attr->rta_type)
+ {
+ case IFA_LABEL:
+ printf("\tlabel=%s\n", (char *)RTA_DATA(attr));
+ break;
+ case IFA_ADDRESS:
+ print_ip_address("interface", *(int*)RTA_DATA(attr));
+ break;
+ case IFA_BROADCAST:
+ print_ip_address("broadcast", *(int*)RTA_DATA(attr));
+ break;
+ case IFA_LOCAL:
+ print_ip_address("local", *(int*)RTA_DATA(attr));
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+struct nl_getaddr_req {
+ struct nlmsghdr hdr;
+ struct rtgenmsg gen;
+};
+
+void test_getaddr(pid_t pid)
+{
+ //build netlink request
+ struct nl_getaddr_req req;
+ memset(&req, 0, sizeof(req));
+ req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtgenmsg));
+ req.hdr.nlmsg_type = RTM_GETADDR;
+ req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+ req.hdr.nlmsg_seq = 654;
+ req.hdr.nlmsg_pid = getpid();
+ req.gen.rtgen_family = AF_INET;
+
+ test_netlink(&req.hdr, pid, test_getaddr_response);
+ assert(called_response_handler);
+}
+
+/////////////////////////////
+//Test RTM_GETNEIGH requests
+/////////////////////////////
+#define ND_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
+
+void test_getneigh_response(struct nlmsghdr *rsp)
+{
+ struct ndmsg *nd;
+ struct rtattr *attr;
+ int len;
+
+ struct in_addr *inp;
+ char ipv4string[INET_ADDRSTRLEN];
+
+ assert(rsp->nlmsg_type == RTM_NEWNEIGH);
+
+ nd = NLMSG_DATA(rsp);
+ //len = NLMSG_PAYLOAD(rsp,sizeof(struct ndmsg));
+ len = RTM_PAYLOAD(rsp);
+
+ printf("Neighbour Table Entry %d:\n", nd->ndm_ifindex);
+ assert(nd->ndm_family == AF_INET || nd->ndm_family == AF_INET6);
+
+ printf("\tndm_state=");
+ switch (nd->ndm_state) {
+ case NUD_INCOMPLETE: printf("NUD_INCOMPLETE\n"); break;
+ case NUD_REACHABLE: printf("NUD_REACHABLE\n"); break;
+ case NUD_STALE: printf("NUD_STALE\n"); break;
+ case NUD_DELAY: printf("NUD_DELAY\n"); break;
+ case NUD_PROBE: printf("NUD_PROBE\n"); break;
+ case NUD_FAILED: printf("NUD_FAILED\n"); break;
+ case NUD_NOARP: printf("NUD_NOARP\n"); break;
+ case NUD_PERMANENT: printf("NUD_PERMANENT\n"); break;
+ default: printf("NUD_???\n");
+ }
+
+ /* loop over all attributes for the NEWLINK message */
+ for (attr = ND_RTA(nd); RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) //IFA_RTA
+ {
+ switch (attr->rta_type)
+ {
+ case NDA_DST:
+ {
+ inp = (struct in_addr *)RTA_DATA(attr);
+ inet_ntop(AF_INET, inp, ipv4string, INET_ADDRSTRLEN);
+ printf("\tIP address=%s\n",ipv4string);
+ }
+ break;
+
+ case NDA_LLADDR:
+ {
+ unsigned char* ptr = (unsigned char*)RTA_DATA(attr);
+ printf("\tL2 address=%02x:%02x:%02x:%02x:%02x:%02x\n",
+ ptr[0], ptr[1], ptr[2], ptr[3], ptr[4], ptr[5]);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+struct nl_getneigh_req {
+ struct nlmsghdr hdr;
+ struct ndmsg r;
+};
+
+void test_getneigh(pid_t pid)
+{
+ //build netlink request
+ struct nl_getneigh_req req;
+ memset(&req, 0, sizeof(req));
+ req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
+ req.hdr.nlmsg_type = RTM_GETNEIGH;
+ req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT | NLM_F_DUMP;
+ req.hdr.nlmsg_seq = 987;
+ req.hdr.nlmsg_pid = getpid();
+ req.r.ndm_family = AF_INET;
+ req.r.ndm_state = NUD_REACHABLE;
+
+ test_netlink(&req.hdr, pid, test_getneigh_response);
+ assert(called_response_handler);
+}
+
+//////////////////////////////////////////
+//Test unsupported netlink type operation
+//////////////////////////////////////////
+void test_invalid_type_response(struct nlmsghdr *rsp)
+{
+ struct nlmsgerr *err;
+ assert(rsp->nlmsg_type == NLMSG_ERROR);
+ err = (struct nlmsgerr *)NLMSG_DATA(rsp);
+ assert(-(err->error) == EOPNOTSUPP);
+}
+
+void test_invalid_type_request()
+{
+ //build netlink request
+ struct nl_getneigh_req req;
+ memset(&req, 0, sizeof(req));
+ req.hdr.nlmsg_len = NLMSG_LENGTH(sizeof(req));
+ req.hdr.nlmsg_type = 9999;
+ req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT | NLM_F_DUMP;
+ req.hdr.nlmsg_seq = 1;
+ req.hdr.nlmsg_pid = getpid();
+ req.r.ndm_family = AF_INET;
+ req.r.ndm_state = NUD_REACHABLE;
+
+ test_netlink(&req.hdr, 0, test_invalid_type_response);
+ assert(called_response_handler);
+}
+
+#ifdef __OSV__
+//////////////////////////////////////////
+//Test handling of corrupt netlink request
+//////////////////////////////////////////
+void test_error_response(struct nlmsghdr *rsp)
+{
+ struct nlmsgerr *err;
+ assert(rsp->nlmsg_type == NLMSG_ERROR);
+ err = (struct nlmsgerr *)NLMSG_DATA(rsp);
+ assert(-(err->error) == EINVAL);
+}
+
+void test_corrupt_request()
+{
+ //build netlink request
+ struct nl_getneigh_req req;
+ memset(&req, 0, sizeof(req));
+ req.hdr.nlmsg_len = NLMSG_LENGTH(0); //This forces EINVAL error
+ req.hdr.nlmsg_type = RTM_GETNEIGH;
+ req.hdr.nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT | NLM_F_DUMP;
+ req.hdr.nlmsg_seq = 1;
+ req.hdr.nlmsg_pid = getpid();
+ req.r.ndm_family = AF_INET;
+ req.r.ndm_state = NUD_REACHABLE;
+
+ test_netlink(&req.hdr, 0, test_error_response);
+ assert(called_response_handler);
+}
+#endif
+
+int main()
+{
+ printf("--------- Interfaces (layer 2) ------\n");
+ test_getlink(0);
+ test_getlink(getpid());
+ test_getlink(123);
+ printf("--------- IP Addresses (layer 3) ----\n");
+ test_getaddr(0);
+ test_getaddr(getpid());
+ test_getaddr(456);
+ printf("--------- Neighbor Table Entries ----\n");
+ test_getneigh(0);
+ test_getneigh(getpid());
+ test_getneigh(789);
+ printf("--------- Testing invalid type request ---\n");
+ test_invalid_type_request();
+#ifdef __OSV__
+ printf("--------- Testing corrupt request ---\n");
+ test_corrupt_request();
+#endif
+}
--
2.35.1

Commit Bot

unread,
Jun 13, 2022, 11:03:37 AM6/13/22
to osv...@googlegroups.com, Waldemar Kozaczuk
From: Waldemar Kozaczuk <jwkoz...@gmail.com>
Committer: Waldemar Kozaczuk <jwkoz...@gmail.com>
Branch: master

netlink: minimal Linux rtnetlink support
src_addr.nl_pid = pid; // if 0 kernel will assign unique id
bind(s, (struct sockaddr*) &src_addr, sizeof(src_addr))

// step 3
dst_addr.nl_family = AF_NETLINK;
dst_addr.nl_pid = 0; // should be 0 if destination is kernel

iov[0].iov_base = req;
iov[0].iov_len = req->nlmsg_len;

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>

---
diff --git a/Makefile b/Makefile
--- a/Makefile
+++ b/Makefile
@@ -593,6 +593,7 @@ bsd += bsd/porting/bus_dma.o
bsd += bsd/sys/netinet/if_ether.o
bsd += bsd/sys/compat/linux/linux_socket.o
bsd += bsd/sys/compat/linux/linux_ioctl.o
+bsd += bsd/sys/compat/linux/linux_netlink.o
bsd += bsd/sys/net/if_ethersubr.o
bsd += bsd/sys/net/if_llatbl.o
bsd += bsd/sys/net/radix.o
diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
+ }
+ return 0;
+}
+
+static int get_sockaddr_mask_prefix_len(struct bsd_sockaddr *sa)
+{
+ void *data;
+ int data_len;
+
+ if (!sa)
+ return 0;
+
+ switch (sa->sa_family) {
+#ifdef INET
+ case AF_INET:
+ data = &((struct bsd_sockaddr_in *)sa)->sin_addr;
+ data_len = sizeof(((struct bsd_sockaddr_in *)sa)->sin_addr);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ data = ((struct bsd_sockaddr_in6 *)sa)->sin6_addr.s6_addr;
+ data_len = sizeof(((struct bsd_sockaddr_in6 *)sa)->sin6_addr);
+ break;
+#endif
+ default:
+ return 0;
+ }
+
+struct nlmsghdr * nlmsg_begin(struct mbuf *m, uint32_t pid, uint32_t seq, int type, int len, int flags)
+{
+ return nlmsg_put(m, pid, seq, type, len, flags);
+}
+
+void nlmsg_end(struct mbuf *m, struct nlmsghdr *nlh)
+{
+ nlh->nlmsg_len = m->M_dat.MH.MH_pkthdr.len - ((uintptr_t)nlh - (uintptr_t)m->m_hdr.mh_data);
+}
+
+int nla_put(struct mbuf *m, int attrtype, int len, const void *src)
+{
+ struct nlattr *nla;
+ int size = nla_attr_size(len);
+ int align_size = NLA_ALIGN(size);
+ nla = (struct nlattr *)nl_m_put(m, align_size);
+ if (!nla)
+ return ENOMEM;
+ nla->nla_len = size;
+ nla->nla_type = attrtype;
+ void *dest = nla_data(nla);
+ memcpy(dest, src, len);
+ if (size != align_size)
+ memset(dest + size, 0, (align_size - size));
+ return 0;
+}
+
+template<class T>
+int nla_put_type(struct mbuf *m, int attrtype, T val)
+{
+ return nla_put(m, attrtype, sizeof(val), &val);
+}
+
+int nla_put_string(struct mbuf *m, int attrtype, const char *str)
+{
+ return nla_put(m, attrtype, strlen(str) + 1, str);
+}
+
+int nla_put_sockaddr(struct mbuf *m, int attrtype, struct bsd_sockaddr *sa)
+{
+ void *data;
+ int data_len;
+
+ if (!sa)
+ return 0;
+
+ switch (sa->sa_family) {
+#ifdef INET
+ case AF_INET:
+ data = &((struct bsd_sockaddr_in *)sa)->sin_addr;
+ data_len = sizeof(((struct bsd_sockaddr_in *)sa)->sin_addr);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ data = ((struct bsd_sockaddr_in6 *)sa)->sin6_addr.s6_addr;
+ data_len = sizeof(((struct bsd_sockaddr_in6 *)sa)->sin6_addr);
+ break;
+#endif
+ case AF_LINK:
+ data = ((struct bsd_sockaddr_dl *)sa)->sdl_data + ((struct bsd_sockaddr_dl *)sa)->sdl_nlen;
+ data_len = ((struct bsd_sockaddr_dl *)sa)->sdl_alen;
+ break;
+ default:
+ data = sa->sa_data;
+ data_len = sa->sa_len;
+ break;
+ }
+
+netlink_attach(struct socket *so, int proto, struct thread *td)
+{
+ return 0;
+}
+
+static int
+netlink_bind(struct socket *so, struct bsd_sockaddr *nam, struct thread *td)
+{
+ struct rawcb *rp = sotorawcb(so);
+
+ KASSERT(rp != NULL, ("netlink_bind: rp == NULL"));
+
+ if (nam->sa_family == AF_NETLINK) {
+ if (nam->sa_len != sizeof(struct bsd_sockaddr_nl)) {
+ bsd_log(ERR, "%s(%d) %s Invalid sockaddr_nl length %d expected %d\n",
+ __FILE__, __LINE__, __FUNCTION__, nam->sa_len, sizeof(struct bsd_sockaddr_nl));
+ return EINVAL;
+ }
+ // TODO: stash the nl_pid somewhere
+ return 0;
+ }
+ return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */
+}
+
+static int
+netlink_connect(struct socket *so, struct bsd_sockaddr *nam, struct thread *td)
+{
+ return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */
+}
+
+/* pru_connect2 is EOPNOTSUPP */
+/* pru_control is EOPNOTSUPP */
+
+static void
+netlink_detach(struct socket *so)
+{
+ struct rawcb *rp = sotorawcb(so);
+
+ struct nlmsgerr *err;
+
+ m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ if (!m) {
+ return ENOBUFS;
+ }
+
+ if ((hdr = (struct nlmsghdr *)nlmsg_put(m,
+ nlm ? nlm->nlmsg_pid : 0,
+ nlm ? nlm->nlmsg_seq : 0,
+ NLMSG_ERROR, sizeof(*err),
+ nlm ? nlm->nlmsg_flags : 0)) == NULL) {
+ m_freem(m);
+ return ENOBUFS;
+ }
+ err = (struct nlmsgerr *) nlmsg_data(hdr);
+ err->error = error;
+ if (nlm) {
+ err->msg = *nlm;
+ } else {
+ memset(&err->msg, 0, sizeof(err->msg));
+ nlm = &err->msg;
+ }
+
+ netlink_dispatch(so, m);
+ return 0;
+}
+
+static int
+netlink_process_getlink_msg(struct socket *so, struct nlmsghdr *nlm)
+{
+ struct ifnet *ifp = NULL;
+ struct bsd_ifaddr *ifa;
+ struct nlmsghdr *nlh;
+ struct ifinfomsg *ifm;
+ struct mbuf *m = NULL;
+ int error = 0;
+
+ m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ if (!m) {
+ return ENOBUFS;
+ }
+
+ IFNET_RLOCK();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ IF_ADDR_RLOCK(ifp);
+
+ nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_NEWLINK, sizeof(*ifm), nlm->nlmsg_flags);
+ if (!nlh) {
+ error = ENOBUFS;
+ goto done;
+ }
+
+ ifm = (struct ifinfomsg *) nlmsg_data(nlh);
+ ifm->ifi_family = AF_UNSPEC;
+ ifm->__ifi_pad = 0;
+ ifm->ifi_type = ifp->if_data.ifi_type;
+ ifm->ifi_index = ifp->if_index;
+ ifm->ifi_flags = ifp->if_flags | ifp->if_drv_flags;
+ ifm->ifi_change = 0;
+ if (nla_put_string(m, IFLA_IFNAME, ifp->if_xname) ||
+ nla_put_type<uint32_t>(m, IFLA_LINK, ifp->if_index)) {
+ error = ENOBUFS;
+ goto done;
+ }
+ /* Add hw address info */
+ for (ifa = ifp->if_addr; ifa != NULL; ifa = TAILQ_NEXT(ifa, ifa_link)) {
+ if (ifa->ifa_addr->sa_family == AF_LINK)
+ break;
+ }
+ if (ifa) {
+ if (nla_put_sockaddr(m, IFLA_ADDRESS, ifa->ifa_addr) ||
+ nla_put_sockaddr(m, IFLA_BROADCAST, ifa->ifa_broadaddr)){
+ error = ENOBUFS;
+ goto done;
+ }
+ }
+
+ IF_ADDR_RUNLOCK(ifp);
+ nlmsg_end(m, nlh);
+ }
+ nlh = nlmsg_put(m, nlm->nlmsg_pid, nlm->nlmsg_seq, NLMSG_DONE, 0, nlm->nlmsg_flags);
+
+ nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_GETADDR, sizeof(*ifm), nlm->nlmsg_flags);
+ if (!nlh) {
+ error = ENOBUFS;
+ goto done;
+ }
+ ifm = (struct ifaddrmsg *) nlmsg_data(nlh);
+ ifm->ifa_index = ifp->if_index;
+ ifm->ifa_family = af;
+ ifm->ifa_prefixlen = get_sockaddr_mask_prefix_len(ifa->ifa_netmask);
+ ifm->ifa_flags = ifp->if_flags | ifp->if_drv_flags;
+ ifm->ifa_scope = 0; // FIXME:
+ if (nla_put_string(m, IFA_LABEL, ifp->if_xname)) {
+ error = ENOBUFS;
+ goto done;
+ }
+#ifdef INET6
+ if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6){
+ // FreeBSD embeds the IPv6 scope ID in the IPv6 address
+ // so need to extract and clear it before returning it.
+ struct bsd_sockaddr_in6 addr, broadaddr;
+ struct bsd_sockaddr *p_addr = ifa->ifa_addr, *p_broadaddr = ifa->ifa_broadaddr;
+ if (p_addr && IN6_IS_ADDR_LINKLOCAL(&((struct bsd_sockaddr_in6 *)p_addr)->sin6_addr)){
+ addr = *(struct bsd_sockaddr_in6 *)p_addr;
+ ifm->ifa_scope = in6_getscope(&addr.sin6_addr);
+ in6_clearscope(&addr.sin6_addr);
+ p_addr = (struct bsd_sockaddr *)&addr;
+ }
+ if (p_broadaddr && IN6_IS_ADDR_LINKLOCAL(&((struct bsd_sockaddr_in6 *)p_broadaddr)->sin6_addr)){
+ broadaddr = *(struct bsd_sockaddr_in6 *)p_broadaddr;
+ in6_clearscope(&broadaddr.sin6_addr);
+ p_broadaddr = (struct bsd_sockaddr *)&broadaddr;
+ }
+ if (nla_put_sockaddr(m, IFA_ADDRESS, p_addr) ||
+ nla_put_sockaddr(m, IFA_BROADCAST, p_broadaddr)){
+ error = ENOBUFS;
+ goto done;
+ }
+ }
+ else
+#endif
+ {
+ if (nla_put_sockaddr(m, IFA_ADDRESS, ifa->ifa_addr) ||
+ nla_put_sockaddr(m, IFA_BROADCAST, ifa->ifa_broadaddr)){
+ error = ENOBUFS;
+ goto done;
+ }
+ }
+ return 0;
+ }
+ }
+#endif
+ if (family == AF_INET) {
+ return NUD_REACHABLE;
+ }
+
+ return 0;
+}
+
+ return 0;
+
+ if (cbdata->state && !(cbdata->state & ndm_state))
+ return 0;
+
+ struct nlmsghdr *nlm = cbdata->nlm;
+ struct mbuf *m = cbdata->m;
+ struct ndmsg *ndm;
+ struct nlmsghdr *nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_GETNEIGH, sizeof(*ndm), nlm->nlmsg_flags);
+
+ if (nla_put(m, NDA_LLADDR, 6, lle->ll_addr.mac16)) {
+ return ENOBUFS;
+ }
+
+ nlmsg_end(m, nlh);
+
+ return 0;
+}
+
+
+static int
+netlink_getneigh_lltable_cb(struct lltable *llt, void *cbdata)
+{
+ struct netlink_getneigh_lle_cbdata *data = (struct netlink_getneigh_lle_cbdata *) cbdata;
+ int error = 0;
+
+ if (data->family && data->family != netlink_bsd_to_linux_family(llt->llt_af))
+ return 0;
+ if (llt->llt_ifp->if_flags & IFF_LOOPBACK)
+ return 0;
+
+ error = lltable_foreach(netlink_getneigh_lltable_cb, &cbdata);
+
+ break;
+ default:
+struct domain netlinkdomain = initialize_with([] (domain& x) {
+ x.dom_family = PF_NETLINK;
+ x.dom_name = "netlink";
+ x.dom_protosw = netlinksw;
+ x.dom_protoswNPROTOSW = &netlinksw[sizeof(netlinksw)/sizeof(netlinksw[0])];
+});
+
+VNET_DOMAIN_SET(netlink);
diff --git a/bsd/sys/compat/linux/linux_netlink.h b/bsd/sys/compat/linux/linux_netlink.h
--- a/bsd/sys/compat/linux/linux_netlink.h
--- a/bsd/sys/compat/linux/linux_socket.cc
+++ b/bsd/sys/compat/linux/linux_socket.cc
@@ -57,6 +57,7 @@

#include <bsd/sys/compat/linux/linux.h>
#include <bsd/sys/compat/linux/linux_socket.h>
+#include <bsd/sys/compat/linux/linux_netlink.h>
#include <osv/stubbing.hh>

#define __NEED_sa_family_t
@@ -257,6 +258,8 @@ linux_to_bsd_domain(int domain)
return (AF_IPX);
case LINUX_AF_APPLETALK:
return (AF_APPLETALK);
+ case LINUX_AF_NETLINK:
+ return (AF_NETLINK);
}
return (-1);
}
@@ -280,6 +283,8 @@ bsd_to_linux_domain(int domain)
return (LINUX_AF_IPX);
case AF_APPLETALK:
return (LINUX_AF_APPLETALK);
+ case AF_NETLINK:
+ return (LINUX_AF_NETLINK);
}
return (-1);
}
diff --git a/bsd/sys/compat/linux/linux_socket.h b/bsd/sys/compat/linux/linux_socket.h
--- a/bsd/sys/compat/linux/linux_socket.h
+++ b/bsd/sys/compat/linux/linux_socket.h
@@ -92,6 +92,7 @@
#define LINUX_AF_IPX 4
#define LINUX_AF_APPLETALK 5
#define LINUX_AF_INET6 10
+#define LINUX_AF_NETLINK 16

/* Supported socket types */

diff --git a/bsd/sys/net/if_llatbl.cc b/bsd/sys/net/if_llatbl.cc
--- a/bsd/sys/net/if_llatbl.cc
+++ b/bsd/sys/net/if_llatbl.cc
+ /* skip deleted entries */
+ if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
+ continue;
+ if ((error = func(llt, lle, cbdata)) != 0)
+ break;
+ }
+ }
+
+ return error;
+}
diff --git a/bsd/sys/net/if_llatbl.h b/bsd/sys/net/if_llatbl.h
--- a/bsd/sys/net/if_llatbl.h
+++ b/bsd/sys/net/if_llatbl.h
@@ -197,6 +197,17 @@ int lltable_sysctl_dumparp(int, struct sysctl_req *);
size_t llentry_free(struct llentry *);
struct llentry *llentry_alloc(struct ifnet *, struct lltable *,
struct bsd_sockaddr_storage *);
+
+/*
+ * Iterate over all lltables
+ */
+int lltable_foreach(int (*func)(struct lltable *llt, void *cbdata), void *cbdata);
+
+/*
+ * Iterate over all llentries in the lltable
+ */
+int lltable_foreach_lle(struct lltable *llt, int (*func)(struct lltable *llt, struct llentry *lle, void *cbdata), void *cbdata);
+
__END_DECLS

/*
@@ -216,4 +227,6 @@ lla_lookup_fast(struct lltable *llt, u_int flags, const struct bsd_sockaddr *l3a
}

int lla_rt_output(struct rt_msghdr *, struct rt_addrinfo *);
+
+
#endif /* _NET_IF_LLATBL_H_ */
diff --git a/bsd/sys/net/netisr.h b/bsd/sys/net/netisr.h

Commit Bot

unread,
Jun 13, 2022, 11:03:38 AM6/13/22
to osv...@googlegroups.com, Waldemar Kozaczuk
From: Waldemar Kozaczuk <jwkoz...@gmail.com>
Committer: Waldemar Kozaczuk <jwkoz...@gmail.com>
Branch: master

netlink: set LINUX_RTM_NEWADDR and LINUX_RTM_NEWNEIGH on responses

This patch fixes a minor bug in handling RTM_GETADDR and RTM_GETNEIGH
requests. It tweaks the relevant code to set the RTM_NEWADDR and RTM_NEWNEIGH
type for the responses respectively.

This is important as for example Golang runtime tests the nlmsg_type of
the netlink response and breaks if it is wrong.

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>

---
diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -588,7 +588,7 @@ netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm)
if (!ifa->ifa_addr)
continue;

- nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_GETADDR, sizeof(*ifm), nlm->nlmsg_flags);
+ nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_NEWADDR, sizeof(*ifm), nlm->nlmsg_flags);
if (!nlh) {
error = ENOBUFS;
goto done;
@@ -720,7 +720,7 @@ netlink_getneigh_lle_cb(struct lltable *llt, struct llentry *lle, void *data)
struct nlmsghdr *nlm = cbdata->nlm;
struct mbuf *m = cbdata->m;
struct ndmsg *ndm;
- struct nlmsghdr *nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_GETNEIGH, sizeof(*ndm), nlm->nlmsg_flags);
+ struct nlmsghdr *nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_NEWNEIGH, sizeof(*ndm), nlm->nlmsg_flags);

if (!nlh) {
return ENOBUFS;
@@ -753,7 +753,7 @@ netlink_getneigh_lle_cb(struct lltable *llt, struct llentry *lle, void *data)
}
}
#endif
-
+
if (nla_put(m, NDA_LLADDR, 6, lle->ll_addr.mac16)) {
struct domain netlinkdomain = initialize_with([] (domain& x) {
- x.dom_family = PF_NETLINK;
- x.dom_name = "netlink";
- x.dom_protosw = netlinksw;
+ x.dom_family = PF_NETLINK;
+ x.dom_name = "netlink";
+ x.dom_protosw = netlinksw;
x.dom_protoswNPROTOSW = &netlinksw[sizeof(netlinksw)/sizeof(netlinksw[0])];
});

Commit Bot

unread,
Jun 13, 2022, 11:03:40 AM6/13/22
to osv...@googlegroups.com, Waldemar Kozaczuk
From: Waldemar Kozaczuk <jwkoz...@gmail.com>
Committer: Waldemar Kozaczuk <jwkoz...@gmail.com>
Branch: master

netlink: do not put IFA_BROADCAST for loopback address

This is a minor adjustment to make OSv implementation match what Linux
does - skip IFA_BROADCAST attributes for loopback address in NEWADDR
response.

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>

---
diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -616,17 +616,23 @@ netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm)
in6_clearscope(&broadaddr.sin6_addr);
p_broadaddr = (struct bsd_sockaddr *)&broadaddr;
}
- if (nla_put_sockaddr(m, IFA_ADDRESS, p_addr) ||
- nla_put_sockaddr(m, IFA_BROADCAST, p_broadaddr)){
+ if (nla_put_sockaddr(m, IFA_ADDRESS, p_addr)){
+ error = ENOBUFS;
+ goto done;
+ }
+ if (!(ifm->ifa_flags & IFF_LOOPBACK) && nla_put_sockaddr(m, IFA_BROADCAST, p_broadaddr)){
error = ENOBUFS;
goto done;
}
}
else
#endif
{
- if (nla_put_sockaddr(m, IFA_ADDRESS, ifa->ifa_addr) ||
- nla_put_sockaddr(m, IFA_BROADCAST, ifa->ifa_broadaddr)){
+ if (nla_put_sockaddr(m, IFA_ADDRESS, ifa->ifa_addr)){
+ error = ENOBUFS;
+ goto done;
+ }
+ if (!(ifm->ifa_flags & IFF_LOOPBACK) && nla_put_sockaddr(m, IFA_BROADCAST, ifa->ifa_broadaddr)){

Commit Bot

unread,
Jun 13, 2022, 11:03:40 AM6/13/22
to osv...@googlegroups.com, Waldemar Kozaczuk
From: Waldemar Kozaczuk <jwkoz...@gmail.com>
Committer: Waldemar Kozaczuk <jwkoz...@gmail.com>
Branch: master

netlink: IFA_ADDRESS needs to go first

Golang uses the netlink interface RTM_GETADDR to query the network interfaces and IPs.
It assumes that the 1st attribute in the RTM_NEWADDR response is IFA_ADDRESS. This
patch changes the order in which RTM_NEWADDR attributes are sent to make
sure the IFA_ADDRESS goes first and IFA_LABEL last.

This does not seem to be documented anywhere but Linux sends RTM_NEWADDR responses
with the IFA_ADDRESS attribute first so we follow suit.

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>

---
diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -599,10 +599,6 @@ netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm)
ifm->ifa_prefixlen = get_sockaddr_mask_prefix_len(ifa->ifa_netmask);
ifm->ifa_flags = ifp->if_flags | ifp->if_drv_flags;
ifm->ifa_scope = 0; // FIXME:
- if (nla_put_string(m, IFA_LABEL, ifp->if_xname)) {
- error = ENOBUFS;
- goto done;
- }
#ifdef INET6
if (ifa->ifa_addr && ifa->ifa_addr->sa_family == AF_INET6){
// FreeBSD embeds the IPv6 scope ID in the IPv6 address
@@ -635,6 +631,10 @@ netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm)
goto done;
}
}
+ if (nla_put_string(m, IFA_LABEL, ifp->if_xname)) {
+ error = ENOBUFS;
+ goto done;
+ }
nlmsg_end(m, nlh);
}

Commit Bot

unread,
Jun 13, 2022, 11:03:42 AM6/13/22
to osv...@googlegroups.com, Waldemar Kozaczuk
From: Waldemar Kozaczuk <jwkoz...@gmail.com>
Committer: Waldemar Kozaczuk <jwkoz...@gmail.com>
Branch: master

netlink: stash nl_pid into netlinkcb

This enhances the netlink_attach() to capture or generate the source
nl_pid (if 0) and save it in the control back that could be fetched later when
necessary. This will be useful in the next patch.

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>

---
diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -61,6 +61,14 @@ struct bsd_sockaddr_nl {
uint32_t nl_groups; /* Multicast groups mask */
};

+struct netlinkcb {
+ struct rawcb raw;
+ pid_t nl_pid;
+};
+
+std::atomic<pid_t> _nl_next_gen_pid(2);
+
+
MALLOC_DEFINE(M_NETLINK, "netlink", "netlink socket");

static struct bsd_sockaddr netlink_src = { 2, PF_NETLINK, };
@@ -311,16 +319,18 @@ netlink_close(struct socket *so)
static int
netlink_attach(struct socket *so, int proto, struct thread *td)
{
+ struct netlinkcb *ncb;
struct rawcb *rp;
int s, error;

KASSERT(so->so_pcb == NULL, ("netlink_attach: so_pcb != NULL"));

/* XXX */
- rp = (rawcb *)malloc(sizeof *rp);
- if (rp == NULL)
+ ncb = (netlinkcb *)malloc(sizeof *ncb);
+ if (ncb == NULL)
return ENOBUFS;
- bzero(rp, sizeof *rp);
+ bzero(ncb, sizeof *ncb);
+ rp = &ncb->raw;

/*
* The splnet() is necessary to block protocols from sending
@@ -362,7 +372,14 @@ netlink_bind(struct socket *so, struct bsd_sockaddr *nam, struct thread *td)
__FILE__, __LINE__, __FUNCTION__, nam->sa_len, sizeof(struct bsd_sockaddr_nl));
return EINVAL;
}
- // TODO: stash the nl_pid somewhere
+ auto *ncb = reinterpret_cast<netlinkcb*>(rp);
+ bsd_sockaddr_nl *nl_sock_addr = (bsd_sockaddr_nl*)nam;
+ if (nl_sock_addr->nl_pid == 0) { // kernel needs to assign pid
+ auto assigned_pid = _nl_next_gen_pid.fetch_add(1, std::memory_order_relaxed);
+ ncb->nl_pid = assigned_pid;
+ } else {
+ ncb->nl_pid = nl_sock_addr->nl_pid;
+ }
return 0;
}

Commit Bot

unread,
Jun 13, 2022, 11:03:43 AM6/13/22
to osv...@googlegroups.com, Waldemar Kozaczuk
From: Waldemar Kozaczuk <jwkoz...@gmail.com>
Committer: Waldemar Kozaczuk <jwkoz...@gmail.com>
Branch: master

netlink: return stashed pid
Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>

---
diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -436,10 +436,27 @@ netlink_shutdown(struct socket *so)
return (raw_usrreqs.pru_shutdown(so));
}

+static pid_t
+get_socket_pid(struct socket *so)
+{
+ struct rawcb *rp = sotorawcb(so);
+ struct netlinkcb *ncb = (netlinkcb *)rp;
+ return ncb->nl_pid;
+}
+
static int
netlink_sockaddr(struct socket *so, struct bsd_sockaddr **nam)
{
- return (raw_usrreqs.pru_sockaddr(so, nam));
+ struct bsd_sockaddr_nl *sin;
+
+ sin = (bsd_sockaddr_nl*)malloc(sizeof *sin);
+ bzero(sin, sizeof *sin);
+ sin->nl_family = AF_NETLINK;
+ sin->nl_len = sizeof(*sin);
+ sin->nl_pid = get_socket_pid(so);
+
+ *nam = (bsd_sockaddr*)sin;
+ return 0;
}

static struct pr_usrreqs netlink_usrreqs = initialize_with([] (pr_usrreqs& x) {
@@ -474,7 +491,7 @@ netlink_senderr(struct socket *so, struct nlmsghdr *nlm, int error)
}

if ((hdr = (struct nlmsghdr *)nlmsg_put(m,
- nlm ? nlm->nlmsg_pid : 0,
+ get_socket_pid(so),
nlm ? nlm->nlmsg_seq : 0,
NLMSG_ERROR, sizeof(*err),
nlm ? nlm->nlmsg_flags : 0)) == NULL) {
@@ -513,7 +530,7 @@ netlink_process_getlink_msg(struct socket *so, struct nlmsghdr *nlm)
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
IF_ADDR_RLOCK(ifp);

- nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_NEWLINK, sizeof(*ifm), nlm->nlmsg_flags);
+ nlh = nlmsg_begin(m, get_socket_pid(so), nlm->nlmsg_seq, LINUX_RTM_NEWLINK, sizeof(*ifm), nlm->nlmsg_flags);
if (!nlh) {
error = ENOBUFS;
goto done;
@@ -547,7 +564,7 @@ netlink_process_getlink_msg(struct socket *so, struct nlmsghdr *nlm)
IF_ADDR_RUNLOCK(ifp);
nlmsg_end(m, nlh);
}
- nlh = nlmsg_put(m, nlm->nlmsg_pid, nlm->nlmsg_seq, NLMSG_DONE, 0, nlm->nlmsg_flags);
+ nlh = nlmsg_put(m, get_socket_pid(so), nlm->nlmsg_seq, NLMSG_DONE, 0, nlm->nlmsg_flags);

done:
if (ifp != NULL)
@@ -605,7 +622,7 @@ netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm)
if (!ifa->ifa_addr)
continue;

- nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_NEWADDR, sizeof(*ifm), nlm->nlmsg_flags);
+ nlh = nlmsg_begin(m, get_socket_pid(so), nlm->nlmsg_seq, LINUX_RTM_NEWADDR, sizeof(*ifm), nlm->nlmsg_flags);
if (!nlh) {
error = ENOBUFS;
goto done;
@@ -663,7 +680,7 @@ netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm)

IF_ADDR_RUNLOCK(ifp);
}
- nlh = nlmsg_put(m, nlm->nlmsg_pid, nlm->nlmsg_seq, NLMSG_DONE, 0, nlm->nlmsg_flags);
+ nlh = nlmsg_put(m, get_socket_pid(so), nlm->nlmsg_seq, NLMSG_DONE, 0, nlm->nlmsg_flags);
done:
if (ifp != NULL)
IF_ADDR_RUNLOCK(ifp);
@@ -728,7 +745,7 @@ struct netlink_getneigh_lle_cbdata {
};

static int
-netlink_getneigh_lle_cb(struct lltable *llt, struct llentry *lle, void *data)
+netlink_getneigh_lle_cb(struct socket *so, struct lltable *llt, struct llentry *lle, void *data)
{
struct netlink_getneigh_lle_cbdata *cbdata = (struct netlink_getneigh_lle_cbdata *) data;
int ndm_family = netlink_bsd_to_linux_family(llt->llt_af);
@@ -743,7 +760,7 @@ netlink_getneigh_lle_cb(struct lltable *llt, struct llentry *lle, void *data)
struct nlmsghdr *nlm = cbdata->nlm;
struct mbuf *m = cbdata->m;
struct ndmsg *ndm;
- struct nlmsghdr *nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq, LINUX_RTM_NEWNEIGH, sizeof(*ndm), nlm->nlmsg_flags);
+ struct nlmsghdr *nlh = nlmsg_begin(m, get_socket_pid(so), nlm->nlmsg_seq, LINUX_RTM_NEWNEIGH, sizeof(*ndm), nlm->nlmsg_flags);

if (!nlh) {
return ENOBUFS;
@@ -788,7 +805,7 @@ netlink_getneigh_lle_cb(struct lltable *llt, struct llentry *lle, void *data)


static int
-netlink_getneigh_lltable_cb(struct lltable *llt, void *cbdata)
+netlink_getneigh_lltable_cb(struct socket *so, struct lltable *llt, void *cbdata)
{
struct netlink_getneigh_lle_cbdata *data = (struct netlink_getneigh_lle_cbdata *) cbdata;
int error = 0;
@@ -799,7 +816,7 @@ netlink_getneigh_lltable_cb(struct lltable *llt, void *cbdata)
return 0;

IF_AFDATA_RLOCK(llt->llt_ifp);
- error = lltable_foreach_lle(llt, netlink_getneigh_lle_cb, data);
+ error = lltable_foreach_lle(so, llt, netlink_getneigh_lle_cb, data);
IF_AFDATA_RUNLOCK(llt->llt_ifp);

return error;
@@ -829,10 +846,10 @@ netlink_process_getneigh_msg(struct socket *so, struct nlmsghdr *nlm)
cbdata.family = ndm->ndm_family;
cbdata.state = ndm->ndm_state;

- error = lltable_foreach(netlink_getneigh_lltable_cb, &cbdata);
+ error = lltable_foreach(so, netlink_getneigh_lltable_cb, &cbdata);

if (!error) {
- nlh = nlmsg_put(m, nlm->nlmsg_pid, nlm->nlmsg_seq, NLMSG_DONE, 0, nlm->nlmsg_flags);
+ nlh = nlmsg_put(m, get_socket_pid(so), nlm->nlmsg_seq, NLMSG_DONE, 0, nlm->nlmsg_flags);
netlink_dispatch(so, m);
} else {
m_free(m);
diff --git a/bsd/sys/net/if_llatbl.cc b/bsd/sys/net/if_llatbl.cc
--- a/bsd/sys/net/if_llatbl.cc
+++ b/bsd/sys/net/if_llatbl.cc
@@ -501,14 +501,14 @@ DB_SHOW_ALL_COMMAND(lltables, db_show_all_lltables)
/*
* Iterate over all lltables
*/
-int lltable_foreach(int (*func)(struct lltable *llt, void *cbdata), void *cbdata)
+int lltable_foreach(struct socket *so, int (*func)(struct socket *so, struct lltable *llt, void *cbdata), void *cbdata)
{
struct lltable *llt;
int error = 0;

LLTABLE_RLOCK();
SLIST_FOREACH(llt, &V_lltables, llt_link) {
- if ((error = func(llt, cbdata)) != 0)
+ if ((error = func(so, llt, cbdata)) != 0)
break;
}
LLTABLE_RUNLOCK();
@@ -519,7 +519,7 @@ int lltable_foreach(int (*func)(struct lltable *llt, void *cbdata), void *cbdata
/*
* Iterate over all llentries in the lltable
*/
-int lltable_foreach_lle(struct lltable *llt, int (*func)(struct lltable *llt, struct llentry *lle, void *cbdata), void *cbdata)
+int lltable_foreach_lle(struct socket *so, struct lltable *llt, int (*func)(struct socket *so, struct lltable *llt, struct llentry *lle, void *cbdata), void *cbdata)
{
struct llentry *lle;
int i;
@@ -530,7 +530,7 @@ int lltable_foreach_lle(struct lltable *llt, int (*func)(struct lltable *llt, st
/* skip deleted entries */
if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
continue;
- if ((error = func(llt, lle, cbdata)) != 0)
+ if ((error = func(so, llt, lle, cbdata)) != 0)
break;
}
}
diff --git a/bsd/sys/net/if_llatbl.h b/bsd/sys/net/if_llatbl.h
--- a/bsd/sys/net/if_llatbl.h
+++ b/bsd/sys/net/if_llatbl.h
@@ -201,12 +201,12 @@ struct llentry *llentry_alloc(struct ifnet *, struct lltable *,
/*
* Iterate over all lltables
*/
-int lltable_foreach(int (*func)(struct lltable *llt, void *cbdata), void *cbdata);
+int lltable_foreach(struct socket *so, int (*func)(struct socket *so, struct lltable *llt, void *cbdata), void *cbdata);

/*
* Iterate over all llentries in the lltable
*/
-int lltable_foreach_lle(struct lltable *llt, int (*func)(struct lltable *llt, struct llentry *lle, void *cbdata), void *cbdata);
+int lltable_foreach_lle(struct socket *so, struct lltable *llt, int (*func)(struct socket *so, struct lltable *llt, struct llentry *lle, void *cbdata), void *cbdata);

__END_DECLS

Commit Bot

unread,
Jun 13, 2022, 11:03:45 AM6/13/22
to osv...@googlegroups.com, Waldemar Kozaczuk
From: Waldemar Kozaczuk <jwkoz...@gmail.com>
Committer: Waldemar Kozaczuk <jwkoz...@gmail.com>
Branch: master

netlink: fix error handling

Fix netlink_process_msg() to propagate potential error
from netlink_senderr(). Normally netlink_senderr() should return
0 indicating that the error response was built successfully.
This patch tweaks the logic to make sure the error response
in such case is sent back as a NLMSG_ERROR reply accordingly
instead of making sendmsg() return error.

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>

---
diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc

Commit Bot

unread,
Jun 13, 2022, 11:03:46 AM6/13/22
to osv...@googlegroups.com, Waldemar Kozaczuk
From: Waldemar Kozaczuk <jwkoz...@gmail.com>
Committer: Waldemar Kozaczuk <jwkoz...@gmail.com>
Branch: master

netlink: made some functions static

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>

---
diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -118,6 +118,7 @@ static int get_sockaddr_mask_prefix_len(struct bsd_sockaddr *sa)
}


+static
void *nl_m_put(struct mbuf *m0, int len)
{
struct mbuf *m, *n;
@@ -151,6 +152,7 @@ void *nl_m_put(struct mbuf *m0, int len)
return data;
}

+static
struct nlmsghdr * nlmsg_put(struct mbuf *m, uint32_t pid, uint32_t seq, int type, int len, int flags)
{
struct nlmsghdr *nlh;
@@ -170,16 +172,19 @@ struct nlmsghdr * nlmsg_put(struct mbuf *m, uint32_t pid, uint32_t seq, int type
return nlh;
}

+static
struct nlmsghdr * nlmsg_begin(struct mbuf *m, uint32_t pid, uint32_t seq, int type, int len, int flags)
{
return nlmsg_put(m, pid, seq, type, len, flags);
}

+static
void nlmsg_end(struct mbuf *m, struct nlmsghdr *nlh)
{
nlh->nlmsg_len = m->M_dat.MH.MH_pkthdr.len - ((uintptr_t)nlh - (uintptr_t)m->m_hdr.mh_data);
}

+static
int nla_put(struct mbuf *m, int attrtype, int len, const void *src)
{
struct nlattr *nla;
@@ -198,16 +203,18 @@ int nla_put(struct mbuf *m, int attrtype, int len, const void *src)
}

template<class T>
-int nla_put_type(struct mbuf *m, int attrtype, T val)
+static int nla_put_type(struct mbuf *m, int attrtype, T val)
{
return nla_put(m, attrtype, sizeof(val), &val);
}

+static
int nla_put_string(struct mbuf *m, int attrtype, const char *str)
{
return nla_put(m, attrtype, strlen(str) + 1, str);
}

+static
int nla_put_sockaddr(struct mbuf *m, int attrtype, struct bsd_sockaddr *sa)
{
void *data;

Commit Bot

unread,
Jun 13, 2022, 11:03:47 AM6/13/22
to osv...@googlegroups.com, Waldemar Kozaczuk
From: Waldemar Kozaczuk <jwkoz...@gmail.com>
Committer: Waldemar Kozaczuk <jwkoz...@gmail.com>
Branch: master

netlink: set negative errno in error responses

The netlink specfication requires that error field contains a negative
value of errno.

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>

---
diff --git a/bsd/sys/compat/linux/linux_netlink.cc b/bsd/sys/compat/linux/linux_netlink.cc
--- a/bsd/sys/compat/linux/linux_netlink.cc
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -506,7 +506,7 @@ netlink_senderr(struct socket *so, struct nlmsghdr *nlm, int error)
return ENOBUFS;
}
err = (struct nlmsgerr *) nlmsg_data(hdr);

Commit Bot

unread,
Jun 13, 2022, 11:03:48 AM6/13/22
to osv...@googlegroups.com, Waldemar Kozaczuk
From: Waldemar Kozaczuk <jwkoz...@gmail.com>
Committer: Waldemar Kozaczuk <jwkoz...@gmail.com>
Branch: master

netlink: enable it and add unit test

This changes bsd/net.cc to enables netlink by registering netlink
domain and calling netlink_init().

It also adds a unit test to verify the netlink implementation.

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>

---
diff --git a/bsd/net.cc b/bsd/net.cc
--- a/modules/tests/Makefile
+++ b/modules/tests/Makefile
@@ -133,7 +133,8 @@ tests := tst-pthread.so misc-ramdisk.so tst-vblk.so tst-bsd-evh.so \
tst-getopt.so tst-getopt-pie.so tst-non-pie.so tst-semaphore.so \
tst-elf-init.so tst-realloc.so tst-setjmp.so \
libtls.so libtls_gold.so tst-tls.so tst-tls-gold.so tst-tls-pie.so \
- tst-sigaction.so tst-syscall.so tst-ifaddrs.so tst-getdents.so
+ tst-sigaction.so tst-syscall.so tst-ifaddrs.so tst-getdents.so \
+ tst-netlink.so
# libstatic-thread-variable.so tst-static-thread-variable.so \

#TODO For now let us disable these tests for aarch64 until
diff --git a/tests/tst-netlink.c b/tests/tst-netlink.c
--- a/tests/tst-netlink.c
+ break;
+ default:
+ called_response_handler = 1;
+ handle_response(rsp);
+ break;
+ }
+ }
+ }
+
+ if (close(s)) {
+ die("close FAILED");
+ };
+ return 0;
+}
+
+ break;
+ }
+ }
+}
+
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+ break;
+ }
+ }
+}
+
+ struct nlmsgerr *err;
+ struct nlmsgerr *err;
Reply all
Reply to author
Forward
0 new messages