Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

mcl2k2 mbuf clusters

6 views
Skip to first unread message

David Gwynne

unread,
Oct 14, 2016, 1:48:56 AM10/14/16
to te...@openbsd.org
this adds a pool backend for MCLGETI thats 2k+2 bytes in size, which
can be used on some very common nics that have annoying constraints
on their rx descriptors.

this in turn simplifies the code in those drivers and lets them
always operate on ETHER_ALIGN boundaries.

the pool is cheap, pages will only be allocated in it if something
asks for them, and it keeps this complexity out of the drivers.

ok?

Index: net/if.h
===================================================================
RCS file: /cvs/src/sys/net/if.h,v
retrieving revision 1.179
diff -u -p -r1.179 if.h
--- net/if.h 4 Sep 2016 15:10:59 -0000 1.179
+++ net/if.h 14 Oct 2016 03:46:22 -0000
@@ -68,7 +68,7 @@ struct if_clonereq {
char *ifcr_buffer; /* buffer for cloner names */
};

-#define MCLPOOLS 7 /* number of cluster pools */
+#define MCLPOOLS 8 /* number of cluster pools */

struct if_rxring {
int rxr_adjusted;
Index: kern/uipc_mbuf.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
retrieving revision 1.233
diff -u -p -r1.233 uipc_mbuf.c
--- kern/uipc_mbuf.c 10 Oct 2016 00:41:17 -0000 1.233
+++ kern/uipc_mbuf.c 14 Oct 2016 03:46:22 -0000
@@ -107,6 +107,7 @@ struct pool mtagpool;
/* mbuf cluster pools */
u_int mclsizes[MCLPOOLS] = {
MCLBYTES, /* must be at slot 0 */
+ MCLBYTES + 2, /* ETHER_ALIGNED 2k mbufs */
4 * 1024,
8 * 1024,
9 * 1024,
@@ -142,6 +143,7 @@ void
mbinit(void)
{
int i;
+ unsigned int lowbits;

#if DIAGNOSTIC
if (mclsizes[0] != MCLBYTES)
@@ -158,9 +160,15 @@ mbinit(void)
IPL_NET, 0, "mtagpl", NULL);

for (i = 0; i < nitems(mclsizes); i++) {
- snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk",
- mclsizes[i] >> 10);
- pool_init(&mclpools[i], mclsizes[i], 0, IPL_NET, 0,
+ lowbits = mclsizes[i] & ((1 << 10) - 1);
+ if (lowbits) {
+ snprintf(mclnames[i], sizeof(mclnames[0]),
+ "mcl%dk%u", mclsizes[i] >> 10, lowbits);
+ } else {
+ snprintf(mclnames[i], sizeof(mclnames[0]), "mcl%dk",
+ mclsizes[i] >> 10);
+ }
+ pool_init(&mclpools[i], mclsizes[i], 64, IPL_NET, 0,
mclnames[i], NULL);
pool_set_constraints(&mclpools[i], &kp_dma_contig);
pool_setlowat(&mclpools[i], mcllowat);
Index: dev/pci/if_em.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.c,v
retrieving revision 1.331
diff -u -p -r1.331 if_em.c
--- dev/pci/if_em.c 13 Apr 2016 10:34:32 -0000 1.331
+++ dev/pci/if_em.c 14 Oct 2016 03:47:21 -0000
@@ -2450,9 +2450,7 @@ em_get_buf(struct em_softc *sc, int i)
return (ENOBUFS);
}
m->m_len = m->m_pkthdr.len = EM_MCLBYTES;
-#ifdef __STRICT_ALIGNMENT
m_adj(m, ETHER_ALIGN);
-#endif

error = bus_dmamap_load_mbuf(sc->sc_dmat, pkt->pkt_map,
m, BUS_DMA_NOWAIT);
Index: dev/pci/if_ix.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
retrieving revision 1.132
diff -u -p -r1.132 if_ix.c
--- dev/pci/if_ix.c 13 Apr 2016 10:34:32 -0000 1.132
+++ dev/pci/if_ix.c 14 Oct 2016 03:47:21 -0000
@@ -616,11 +616,7 @@ ixgbe_init(void *arg)
ixgbe_initialize_transmit_units(sc);

/* Use 2k clusters, even for jumbo frames */
-#ifdef __STRICT_ALIGNMENT
sc->rx_mbuf_sz = MCLBYTES + ETHER_ALIGN;
-#else
- sc->rx_mbuf_sz = MCLBYTES;
-#endif

/* Prepare receive descriptors and buffers */
if (ixgbe_setup_receive_structures(sc)) {
@@ -2458,9 +2454,7 @@ ixgbe_get_buf(struct rx_ring *rxr, int i
return (ENOBUFS);

mp->m_len = mp->m_pkthdr.len = sc->rx_mbuf_sz;
-#ifdef __STRICT_ALIGNMENT
m_adj(mp, ETHER_ALIGN);
-#endif

error = bus_dmamap_load_mbuf(rxr->rxdma.dma_tag, rxbuf->map,
mp, BUS_DMA_NOWAIT);
@@ -2667,11 +2661,7 @@ ixgbe_initialize_receive_units(struct ix
hlreg |= IXGBE_HLREG0_JUMBOEN;
IXGBE_WRITE_REG(&sc->hw, IXGBE_HLREG0, hlreg);

-#ifdef __STRICT_ALIGNMENT
bufsz = (sc->rx_mbuf_sz - ETHER_ALIGN) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
-#else
- bufsz = sc->rx_mbuf_sz >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
-#endif

for (i = 0; i < sc->num_queues; i++, rxr++) {
uint64_t rdba = rxr->rxdma.dma_map->dm_segs[0].ds_addr;
Index: dev/pci/if_em.h
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_em.h,v
retrieving revision 1.72
diff -u -p -r1.72 if_em.h
--- dev/pci/if_em.h 18 Feb 2016 14:24:39 -0000 1.72
+++ dev/pci/if_em.h 14 Oct 2016 03:48:32 -0000
@@ -263,11 +263,7 @@ typedef int boolean_t;
#define EM_RXBUFFER_8192 8192
#define EM_RXBUFFER_16384 16384

-#ifdef __STRICT_ALIGNMENT
#define EM_MCLBYTES (EM_RXBUFFER_2048 + ETHER_ALIGN)
-#else
-#define EM_MCLBYTES EM_RXBUFFER_2048
-#endif

#define EM_MAX_SCATTER 64
#define EM_TSO_SIZE 65535

Claudio Jeker

unread,
Oct 14, 2016, 1:58:33 AM10/14/16
to te...@openbsd.org
It is time to put the nasty comment from rl(4) into em(4) and ix(4).
Everybody knew how bad realtek was but thinks Intel nics are good. The
truth is that modern Intel nic are as bad as the cheepest and crapiest
10/100 Mbps Ethernet chips from the last millenium.

--
:wq Claudio

Ted Unangst

unread,
Oct 14, 2016, 2:18:12 AM10/14/16
to David Gwynne, te...@openbsd.org
David Gwynne wrote:
> this adds a pool backend for MCLGETI thats 2k+2 bytes in size, which
> can be used on some very common nics that have annoying constraints
> on their rx descriptors.
>
> this in turn simplifies the code in those drivers and lets them
> always operate on ETHER_ALIGN boundaries.
>
> the pool is cheap, pages will only be allocated in it if something
> asks for them, and it keeps this complexity out of the drivers.

pool is efficient, but it can't work miracles. on archs like amd64, this is
going to only pack 15 clusters in a 32k "page" instead of 16.

if i understand correctly, currently em is using 4k clusters on strict
alignment? i think it makes a lot of sense to add a 2k2 pool because that
means using less memory. but not strict alignment shouldn't be forced to use
more memory. that's a regression.

David Gwynne

unread,
Oct 14, 2016, 2:32:40 AM10/14/16
to Ted Unangst, te...@openbsd.org

> On 14 Oct 2016, at 16:17, Ted Unangst <te...@tedunangst.com> wrote:
>
> David Gwynne wrote:
>> this adds a pool backend for MCLGETI thats 2k+2 bytes in size, which
>> can be used on some very common nics that have annoying constraints
>> on their rx descriptors.
>>
>> this in turn simplifies the code in those drivers and lets them
>> always operate on ETHER_ALIGN boundaries.
>>
>> the pool is cheap, pages will only be allocated in it if something
>> asks for them, and it keeps this complexity out of the drivers.
>
> pool is efficient, but it can't work miracles. on archs like amd64, this is
> going to only pack 15 clusters in a 32k "page" instead of 16.

it is only about 70 bytes per cluster though. i could raise the size by that to avoid slack space if you want, or we can let the page colouring have some fun here.

>
> if i understand correctly, currently em is using 4k clusters on strict
> alignment? i think it makes a lot of sense to add a 2k2 pool because that
> means using less memory. but not strict alignment shouldn't be forced to use
> more memory. that's a regression.

you could also argue that 2k clusters waste ~500 bytes in the most common case since the vast majority of large packets are still only 1514 bytes. we could pack 10 1600 byte clusters into 16k instead of the 8 we do now.

more seriously though, im happy to burn a bit more memory to keep the code simple, and overall our memory consumption by network cards is much lower than other platforms because of the rx ring moderation we do.

dlg

Mike Belopuhov

unread,
Oct 14, 2016, 6:41:05 AM10/14/16
to Claudio Jeker, te...@openbsd.org
On Fri, Oct 14, 2016 at 07:58 +0200, Claudio Jeker wrote:
> It is time to put the nasty comment from rl(4) into em(4) and ix(4).
> Everybody knew how bad realtek was but thinks Intel nics are good. The
> truth is that modern Intel nic are as bad as the cheepest and crapiest
> 10/100 Mbps Ethernet chips from the last millenium.
>
> --
> :wq Claudio
>

Then don't use them. Vanity won't fix anything (if there's anything
to fix at all).

Mike Belopuhov

unread,
Oct 14, 2016, 6:45:55 AM10/14/16
to David Gwynne, te...@openbsd.org
On Fri, Oct 14, 2016 at 15:48 +1000, David Gwynne wrote:
> this adds a pool backend for MCLGETI thats 2k+2 bytes in size, which
> can be used on some very common nics that have annoying constraints
> on their rx descriptors.
>
> this in turn simplifies the code in those drivers and lets them
> always operate on ETHER_ALIGN boundaries.
>

I would agree for using it under the _STRICT_ALIGNED ifdef instead
of wasting a page, but not in the generic case. No technical reason
to do it. Code simplifications brought by this change are miniscule.
In fact it adds more code to the uipc_mbuf than saves in the driver.

> the pool is cheap, pages will only be allocated in it if something
> asks for them, and it keeps this complexity out of the drivers.
>
> ok?
>

Not OK. Not w/o *at least* the performance assessment of this change.
Right now it's just a distraction from more important issues.

Mike Belopuhov

unread,
Oct 14, 2016, 6:58:53 AM10/14/16
to Claudio Jeker, tech
I don't mean to offend you personally. It's directed at all of us,
me included. I apologize if that sounded too harsh.

0 new messages