[COMMIT osv master] ena: adapt data path code to OSv

1 view
Skip to first unread message

Commit Bot

unread,
Jan 11, 2024, 12:43:14 PMJan 11
to osv...@googlegroups.com, Waldemar Kozaczuk
From: Waldemar Kozaczuk <jwkoz...@gmail.com>
Committer: WALDEMAR KOZACZUK <jwkoz...@gmail.com>
Branch: master

ena: adapt data path code to OSv

This patch adapts the middle layer of data path handling logic
to make it work in OSv. For more details about it please see
https://github.com/amzn/amzn-drivers/tree/master/kernel/fbsd/ena#data-path-interface.
and https://github.com/amzn/amzn-drivers/tree/master/kernel/fbsd/ena#data-path.

In high level the main entry point for RX part is ena_cleanup() that
delegates to ena_rx_cleanup() and eventually ends up calling net
channel ifp->if_classifier.post_packet() (fast path) or ifp->if_input()
(slow path). The ena_cleanup() is called by cleanup_work thread that
is woken every time the MSI-X vector for given TX/RX queue is called.

Similarly, the main entry point for TX part is ena_mq_start() which
is what ifp->if_transmit is set to and ena_deferred_mq_start() which is
called by enqueue_work thread that is woken in ena_mq_start() and
ena_tx_cleanup() (other part of ena_cleanup() routine).

Finally, ena_qflush is what ifp->if_qflush is set to.

The particular code changes to ena_datapath.* involve following:

- implement critical_enter()/critical_exit() used by buf_ring (see
https://man.freebsd.org/cgi/man.cgi?query=critical_enter)

- for now remove RSS and DEV_NETMAP related code

- replace the drbr_* functions with buf_ring_* equivalent ones

- replace taskqueue_enqueue() with OSv wake_with()

- adapt references to the mbuf fields to match OSv version of
it (please https://github.com/freebsd/freebsd-src/commit/3d1a9ed34e10dc6c5b6d86617464534ac9fc928a
commit that changed the layout of mbuf struct a bit)

- simplify ena_tx_map_mbuf() given we hard-code to use ENA_ADMIN_PLACEMENT_POLICY_HOST
TX queue type and do not use bus_dma API (see
https://man.freebsd.org/cgi/man.cgi?query=bus_dma)

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>

---
diff --git a/bsd/sys/dev/ena/ena_datapath.cc b/bsd/sys/dev/ena/ena_datapath.cc
--- a/bsd/sys/dev/ena/ena_datapath.cc
+++ b/bsd/sys/dev/ena/ena_datapath.cc
@@ -30,16 +30,20 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");

+//#define ENA_LOG_ENABLE 1
+//#define ENA_LOG_IO_ENABLE 1
+
#include "ena.h"
#include "ena_datapath.h"
-#ifdef DEV_NETMAP
-#include "ena_netmap.h"
-#endif /* DEV_NETMAP */
-#ifdef RSS
-#include <net/rss_config.h>
-#endif /* RSS */

-#include <netinet6/ip6_var.h>
+#include <osv/sched.hh>
+
+static inline void critical_enter() { sched::preempt_disable(); }
+static inline void critical_exit() { sched::preempt_enable(); }
+
+#include <sys/buf_ring.h>
+
+//#include <netinet6/ip6_var.h>

/*********************************************************************
* Static functions prototypes
@@ -49,8 +53,6 @@ static int ena_tx_cleanup(struct ena_ring *);
static int ena_rx_cleanup(struct ena_ring *);
static inline int ena_get_tx_req_id(struct ena_ring *tx_ring,
struct ena_com_io_cq *io_cq, uint16_t *req_id);
-static void ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *,
- struct mbuf *);
static struct mbuf *ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *,
struct ena_com_rx_ctx *, uint16_t *);
static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *,
@@ -66,9 +68,8 @@ static void ena_start_xmit(struct ena_ring *);
*********************************************************************/

void
-ena_cleanup(void *arg, int pending)
+ena_cleanup(struct ena_que *que)
{
- struct ena_que *que = arg;
struct ena_adapter *adapter = que->adapter;
if_t ifp = adapter->ifp;
struct ena_ring *tx_ring;
@@ -78,25 +79,25 @@ ena_cleanup(void *arg, int pending)
int qid, ena_qid;
int txc, rxc, i;

- if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
+ if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))
return;

- ena_log_io(adapter->pdev, DBG, "MSI-X TX/RX routine\n");
+ ena_log_io(adapter->pdev, INFO, "MSI-X TX/RX routine");

tx_ring = que->tx_ring;
rx_ring = que->rx_ring;
qid = que->id;
ena_qid = ENA_IO_TXQ_IDX(qid);
io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];

- atomic_store_8(&tx_ring->first_interrupt, 1);
- atomic_store_8(&rx_ring->first_interrupt, 1);
+ tx_ring->first_interrupt.store(1);
+ rx_ring->first_interrupt.store(1);

for (i = 0; i < ENA_CLEAN_BUDGET; ++i) {
rxc = ena_rx_cleanup(rx_ring);
txc = ena_tx_cleanup(tx_ring);

- if (unlikely((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
+ if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))
return;

if ((txc != ENA_TX_BUDGET) && (rxc != ENA_RX_BUDGET))
@@ -111,13 +112,12 @@ ena_cleanup(void *arg, int pending)
}

void
-ena_deferred_mq_start(void *arg, int pending)
+ena_deferred_mq_start(struct ena_ring *tx_ring )
{
- struct ena_ring *tx_ring = (struct ena_ring *)arg;
if_t ifp = tx_ring->adapter->ifp;

- while (!drbr_empty(ifp, tx_ring->br) && tx_ring->running &&
- (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0) {
+ while (!buf_ring_empty(tx_ring->br) && tx_ring->running &&
+ (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
ENA_RING_MTX_LOCK(tx_ring);
ena_start_xmit(tx_ring);
ENA_RING_MTX_UNLOCK(tx_ring);
@@ -127,15 +127,12 @@ ena_deferred_mq_start(void *arg, int pending)
int
ena_mq_start(if_t ifp, struct mbuf *m)
{
- struct ena_adapter *adapter = if_getsoftc(ifp);
+ struct ena_adapter *adapter = static_cast<ena_adapter *>(ifp->if_softc);
struct ena_ring *tx_ring;
- int ret, is_drbr_empty;
+ int ret, is_br_empty;
uint32_t i;
-#ifdef RSS
- uint32_t bucket_id;
-#endif

- if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
+ if (unlikely((adapter->ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))
return (ENODEV);

/* Which queue to use */
@@ -145,31 +142,26 @@ ena_mq_start(if_t ifp, struct mbuf *m)
* It should improve performance.
*/
if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
-#ifdef RSS
- if (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
- &bucket_id) == 0)
- i = bucket_id % adapter->num_io_queues;
- else
-#endif
- i = m->m_pkthdr.flowid % adapter->num_io_queues;
+ i = m->M_dat.MH.MH_pkthdr.flowid % adapter->num_io_queues;
} else {
- i = curcpu % adapter->num_io_queues;
+ i = sched::cpu::current()->id % adapter->num_io_queues;
}
tx_ring = &adapter->tx_ring[i];

- /* Check if drbr is empty before putting packet */
- is_drbr_empty = drbr_empty(ifp, tx_ring->br);
- ret = drbr_enqueue(ifp, tx_ring->br, m);
+ /* Check if br is empty before putting packet */
+ is_br_empty = buf_ring_empty(tx_ring->br);
+ ret = buf_ring_enqueue(tx_ring->br, m);
if (unlikely(ret != 0)) {
- taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
+ m_freem(m);
+ tx_ring->enqueue_thread->wake_with([tx_ring] { tx_ring->enqueue_pending++; });
return (ret);
}

- if (is_drbr_empty && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) {
+ if (is_br_empty && (ENA_RING_MTX_TRYLOCK(tx_ring) != 0)) {
ena_start_xmit(tx_ring);
ENA_RING_MTX_UNLOCK(tx_ring);
} else {
- taskqueue_enqueue(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
+ tx_ring->enqueue_thread->wake_with([tx_ring] { tx_ring->enqueue_pending++; });
}

return (0);
@@ -178,14 +170,16 @@ ena_mq_start(if_t ifp, struct mbuf *m)
void
ena_qflush(if_t ifp)
{
- struct ena_adapter *adapter = if_getsoftc(ifp);
+ struct ena_adapter *adapter = static_cast<ena_adapter *>(ifp->if_softc);
struct ena_ring *tx_ring = adapter->tx_ring;
int i;

for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring)
- if (!drbr_empty(ifp, tx_ring->br)) {
+ if (!buf_ring_empty(tx_ring->br)) {
ENA_RING_MTX_LOCK(tx_ring);
- drbr_flush(ifp, tx_ring->br);
+ struct mbuf *m;
+ while ((m = (struct mbuf *)buf_ring_dequeue_sc(tx_ring->br)) != NULL)
+ m_freem(m);
ENA_RING_MTX_UNLOCK(tx_ring);
}

@@ -208,7 +202,7 @@ ena_get_tx_req_id(struct ena_ring *tx_ring, struct ena_com_io_cq *io_cq,
return (EAGAIN);

if (unlikely(rc != 0)) {
- ena_log(adapter->pdev, ERR, "Invalid req_id %hu in qid %hu\n",
+ ena_log(adapter->pdev, ERR, "Invalid req_id %hu in qid %hu",
*req_id, tx_ring->qid);
counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
goto err;
@@ -218,7 +212,7 @@ ena_get_tx_req_id(struct ena_ring *tx_ring, struct ena_com_io_cq *io_cq,
return (0);

ena_log(adapter->pdev, ERR,
- "tx_info doesn't have valid mbuf. req_id %hu qid %hu\n",
+ "tx_info doesn't have valid mbuf. req_id %hu qid %hu",
*req_id, tx_ring->qid);
err:
ena_trigger_reset(adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
@@ -257,11 +251,6 @@ ena_tx_cleanup(struct ena_ring *tx_ring)
io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
next_to_clean = tx_ring->next_to_clean;

-#ifdef DEV_NETMAP
- if (netmap_tx_irq(adapter->ifp, tx_ring->qid) != NM_IRQ_PASS)
- return (0);
-#endif /* DEV_NETMAP */
-
do {
struct ena_tx_buffer *tx_info;
struct mbuf *mbuf;
@@ -275,13 +264,9 @@ ena_tx_cleanup(struct ena_ring *tx_ring)
mbuf = tx_info->mbuf;

tx_info->mbuf = NULL;
- bintime_clear(&tx_info->timestamp);
-
- bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
- BUS_DMASYNC_POSTWRITE);
- bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);
+ tx_info->timestamp = 0;

- ena_log_io(adapter->pdev, DBG, "tx: q %d mbuf %p completed\n",
+ ena_log_io(adapter->pdev, DBG, "tx: q %d mbuf %p completed",
tx_ring->qid, mbuf);

m_freem(mbuf);
@@ -306,7 +291,7 @@ ena_tx_cleanup(struct ena_ring *tx_ring)

work_done = ENA_TX_BUDGET - budget;

- ena_log_io(adapter->pdev, DBG, "tx: q %d done. total pkts: %d\n",
+ ena_log_io(adapter->pdev, DBG, "tx: q %d done. total pkts: %d",
tx_ring->qid, work_done);

/* If there is still something to commit update ring state */
@@ -332,82 +317,16 @@ ena_tx_cleanup(struct ena_ring *tx_ring)
if (!tx_ring->running && above_thresh) {
tx_ring->running = true;
counter_u64_add(tx_ring->tx_stats.queue_wakeup, 1);
- taskqueue_enqueue(tx_ring->enqueue_tq,
- &tx_ring->enqueue_task);
+ tx_ring->enqueue_thread->wake_with([tx_ring] { tx_ring->enqueue_pending++; });
}
ENA_RING_MTX_UNLOCK(tx_ring);
}

- tx_ring->tx_last_cleanup_ticks = ticks;
+ tx_ring->tx_last_cleanup_ticks = bsd_ticks;

return (work_done);
}

-static void
-ena_rx_hash_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
- struct mbuf *mbuf)
-{
- struct ena_adapter *adapter = rx_ring->adapter;
-
- if (likely(ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
- mbuf->m_pkthdr.flowid = ena_rx_ctx->hash;
-
-#ifdef RSS
- /*
- * Hardware and software RSS are in agreement only when both are
- * configured to Toeplitz algorithm. This driver configures
- * that algorithm only when software RSS is enabled and uses it.
- */
- if (adapter->ena_dev->rss.hash_func != ENA_ADMIN_TOEPLITZ &&
- ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN) {
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
- return;
- }
-#endif
-
- if (ena_rx_ctx->frag &&
- (ena_rx_ctx->l3_proto != ENA_ETH_IO_L3_PROTO_UNKNOWN)) {
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
- return;
- }
-
- switch (ena_rx_ctx->l3_proto) {
- case ENA_ETH_IO_L3_PROTO_IPV4:
- switch (ena_rx_ctx->l4_proto) {
- case ENA_ETH_IO_L4_PROTO_TCP:
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4);
- break;
- case ENA_ETH_IO_L4_PROTO_UDP:
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4);
- break;
- default:
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4);
- }
- break;
- case ENA_ETH_IO_L3_PROTO_IPV6:
- switch (ena_rx_ctx->l4_proto) {
- case ENA_ETH_IO_L4_PROTO_TCP:
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6);
- break;
- case ENA_ETH_IO_L4_PROTO_UDP:
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6);
- break;
- default:
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6);
- }
- break;
- case ENA_ETH_IO_L3_PROTO_UNKNOWN:
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
- break;
- default:
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_OPAQUE_HASH);
- }
- } else {
- mbuf->m_pkthdr.flowid = rx_ring->qid;
- M_HASHTYPE_SET(mbuf, M_HASHTYPE_NONE);
- }
-}
-
/**
* ena_rx_mbuf - assemble mbuf from descriptors
* @rx_ring: ring for which we want to clean packets
@@ -423,44 +342,34 @@ ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs,
struct mbuf *mbuf;
struct ena_rx_buffer *rx_info;
struct ena_adapter *adapter;
- device_t pdev;
unsigned int descs = ena_rx_ctx->descs;
uint16_t ntc, len, req_id, buf = 0;

ntc = *next_to_clean;
adapter = rx_ring->adapter;
- pdev = adapter->pdev;

len = ena_bufs[buf].len;
req_id = ena_bufs[buf].req_id;
rx_info = &rx_ring->rx_buffer_info[req_id];
if (unlikely(rx_info->mbuf == NULL)) {
- ena_log(pdev, ERR, "NULL mbuf in rx_info");
+ ena_log(adapter->pdev, ERR, "NULL mbuf in rx_info");
return (NULL);
}

- ena_log_io(pdev, DBG, "rx_info %p, mbuf %p, paddr %jx\n", rx_info,
+ ena_log_io(adapter->pdev, DBG, "rx_info %p, mbuf %p, paddr %jx", rx_info,
rx_info->mbuf, (uintmax_t)rx_info->ena_buf.paddr);

- bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
- BUS_DMASYNC_POSTREAD);
mbuf = rx_info->mbuf;
- mbuf->m_flags |= M_PKTHDR;
- mbuf->m_pkthdr.len = len;
- mbuf->m_len = len;
+ mbuf->m_hdr.mh_flags |= M_PKTHDR;
+ mbuf->M_dat.MH.MH_pkthdr.len = len;
+ mbuf->m_hdr.mh_len = len;
/* Only for the first segment the data starts at specific offset */
- mbuf->m_data = mtodo(mbuf, ena_rx_ctx->pkt_offset);
- ena_log_io(pdev, DBG, "Mbuf data offset=%u\n", ena_rx_ctx->pkt_offset);
- mbuf->m_pkthdr.rcvif = rx_ring->que->adapter->ifp;
-
- /* Fill mbuf with hash key and it's interpretation for optimization */
- ena_rx_hash_mbuf(rx_ring, ena_rx_ctx, mbuf);
+ mbuf->m_hdr.mh_data = static_cast<caddr_t>(mtodo(mbuf, ena_rx_ctx->pkt_offset));
+ ena_log_io(adapter->pdev, DBG, "Mbuf data offset=%u", ena_rx_ctx->pkt_offset);
+ mbuf->M_dat.MH.MH_pkthdr.rcvif = rx_ring->que->adapter->ifp;

- ena_log_io(pdev, DBG, "rx mbuf 0x%p, flags=0x%x, len: %d\n", mbuf,
- mbuf->m_flags, mbuf->m_pkthdr.len);
-
- /* DMA address is not needed anymore, unmap it */
- bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
+ ena_log_io(adapter->pdev, DBG, "rx mbuf 0x%p, flags=0x%x, len: %d", mbuf,
+ mbuf->m_hdr.mh_flags, mbuf->M_dat.MH.MH_pkthdr.len);

rx_info->mbuf = NULL;
rx_ring->free_rx_ids[ntc] = req_id;
@@ -477,7 +386,7 @@ ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs,
rx_info = &rx_ring->rx_buffer_info[req_id];

if (unlikely(rx_info->mbuf == NULL)) {
- ena_log(pdev, ERR, "NULL mbuf in rx_info");
+ ena_log(adapter->pdev, ERR, "NULL mbuf in rx_info");
/*
* If one of the required mbufs was not allocated yet,
* we can break there.
@@ -492,19 +401,15 @@ ena_rx_mbuf(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs,
return (NULL);
}

- bus_dmamap_sync(adapter->rx_buf_tag, rx_info->map,
- BUS_DMASYNC_POSTREAD);
- if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_data) == 0)) {
+ if (unlikely(m_append(mbuf, len, rx_info->mbuf->m_hdr.mh_data) == 0)) {
counter_u64_add(rx_ring->rx_stats.mbuf_alloc_fail, 1);
- ena_log_io(pdev, WARN, "Failed to append Rx mbuf %p\n",
+ ena_log_io(adapter->pdev, WARN, "Failed to append Rx mbuf %p",
mbuf);
}

- ena_log_io(pdev, DBG, "rx mbuf updated. len %d\n",
- mbuf->m_pkthdr.len);
+ ena_log_io(adapter->pdev, DBG, "rx mbuf updated. len %d",
+ mbuf->M_dat.MH.MH_pkthdr.len);

- /* Free already appended mbuf, it won't be useful anymore */
- bus_dmamap_unload(rx_ring->adapter->rx_buf_tag, rx_info->map);
m_freem(rx_info->mbuf);
rx_info->mbuf = NULL;

@@ -524,15 +429,13 @@ static inline void
ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
struct mbuf *mbuf)
{
- device_t pdev = rx_ring->adapter->pdev;
-
/* if IP and error */
if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
ena_rx_ctx->l3_csum_err)) {
/* ipv4 checksum error */
- mbuf->m_pkthdr.csum_flags = 0;
+ mbuf->M_dat.MH.MH_pkthdr.csum_flags = 0;
counter_u64_add(rx_ring->rx_stats.csum_bad, 1);
- ena_log_io(pdev, DBG, "RX IPv4 header checksum error\n");
+ ena_log_io(pdev, DBG, "RX IPv4 header checksum error");
return;
}

@@ -541,12 +444,12 @@ ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx,
(ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)) {
if (ena_rx_ctx->l4_csum_err) {
/* TCP/UDP checksum error */
- mbuf->m_pkthdr.csum_flags = 0;
+ mbuf->M_dat.MH.MH_pkthdr.csum_flags = 0;
counter_u64_add(rx_ring->rx_stats.csum_bad, 1);
- ena_log_io(pdev, DBG, "RX L4 checksum error\n");
+ ena_log_io(pdev, DBG, "RX L4 checksum error");
} else {
- mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
- mbuf->m_pkthdr.csum_flags |= CSUM_IP_VALID;
+ mbuf->M_dat.MH.MH_pkthdr.csum_flags= CSUM_IP_CHECKED;
+ mbuf->M_dat.MH.MH_pkthdr.csum_flags |= CSUM_IP_VALID;
counter_u64_add(rx_ring->rx_stats.csum_good, 1);
}
}
@@ -560,7 +463,6 @@ static int
ena_rx_cleanup(struct ena_ring *rx_ring)
{
struct ena_adapter *adapter;
- device_t pdev;
struct mbuf *mbuf;
struct ena_com_rx_ctx ena_rx_ctx;
struct ena_com_io_cq *io_cq;
@@ -575,34 +477,23 @@ ena_rx_cleanup(struct ena_ring *rx_ring)
unsigned int qid;
int rc, i;
int budget = ENA_RX_BUDGET;
-#ifdef DEV_NETMAP
- int done;
-#endif /* DEV_NETMAP */

adapter = rx_ring->que->adapter;
- pdev = adapter->pdev;
ifp = adapter->ifp;
qid = rx_ring->que->id;
ena_qid = ENA_IO_RXQ_IDX(qid);
io_cq = &adapter->ena_dev->io_cq_queues[ena_qid];
io_sq = &adapter->ena_dev->io_sq_queues[ena_qid];
next_to_clean = rx_ring->next_to_clean;

-#ifdef DEV_NETMAP
- if (netmap_rx_irq(adapter->ifp, rx_ring->qid, &done) != NM_IRQ_PASS)
- return (0);
-#endif /* DEV_NETMAP */
-
- ena_log_io(pdev, DBG, "rx: qid %d\n", qid);
+ ena_log_io(adapter->pdev, DBG, "rx: qid %d", qid);

do {
ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
ena_rx_ctx.max_bufs = adapter->max_rx_sgl_size;
ena_rx_ctx.descs = 0;
ena_rx_ctx.pkt_offset = 0;

- bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
- io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_POSTREAD);
rc = ena_com_rx_pkt(io_cq, io_sq, &ena_rx_ctx);
if (unlikely(rc != 0)) {
if (rc == ENA_COM_NO_SPACE) {
@@ -621,16 +512,14 @@ ena_rx_cleanup(struct ena_ring *rx_ring)
if (unlikely(ena_rx_ctx.descs == 0))
break;

- ena_log_io(pdev, DBG,
- "rx: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
+ ena_log_io(adapter->pdev, DBG,
+ "rx: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x",
rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
ena_rx_ctx.l4_proto, ena_rx_ctx.hash);

/* Receive mbuf from the ring */
mbuf = ena_rx_mbuf(rx_ring, rx_ring->ena_bufs, &ena_rx_ctx,
&next_to_clean);
- bus_dmamap_sync(io_cq->cdesc_addr.mem_handle.tag,
- io_cq->cdesc_addr.mem_handle.map, BUS_DMASYNC_PREREAD);
/* Exit if we failed to retrieve a buffer */
if (unlikely(mbuf == NULL)) {
for (i = 0; i < ena_rx_ctx.descs; ++i) {
@@ -642,24 +531,24 @@ ena_rx_cleanup(struct ena_ring *rx_ring)
break;
}

- if (((if_getcapenable(ifp) & IFCAP_RXCSUM) != 0) ||
- ((if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6) != 0)) {
+ if (((ifp->if_capenable & IFCAP_RXCSUM) != 0) ||
+ ((ifp->if_capenable & IFCAP_RXCSUM_IPV6) != 0)) {
ena_rx_checksum(rx_ring, &ena_rx_ctx, mbuf);
}

counter_enter();
counter_u64_add_protected(rx_ring->rx_stats.bytes,
- mbuf->m_pkthdr.len);
+ mbuf->M_dat.MH.MH_pkthdr.len);
counter_u64_add_protected(adapter->hw_stats.rx_bytes,
- mbuf->m_pkthdr.len);
+ mbuf->M_dat.MH.MH_pkthdr.len);
counter_exit();
/*
* LRO is only for IP/TCP packets and TCP checksum of the packet
* should be computed by hardware.
*/
do_if_input = 1;
- if (((if_getcapenable(ifp) & IFCAP_LRO) != 0) &&
- ((mbuf->m_pkthdr.csum_flags & CSUM_IP_VALID) != 0) &&
+ if (((ifp->if_capenable & IFCAP_LRO) != 0) &&
+ ((mbuf->M_dat.MH.MH_pkthdr.csum_flags & CSUM_IP_VALID) != 0) &&
(ena_rx_ctx.l4_proto == ENA_ETH_IO_L4_PROTO_TCP)) {
/*
* Send to the stack if:
@@ -672,9 +561,12 @@ ena_rx_cleanup(struct ena_ring *rx_ring)
do_if_input = 0;
}
if (do_if_input != 0) {
- ena_log_io(pdev, DBG,
- "calling if_input() with mbuf %p\n", mbuf);
- if_input(ifp, mbuf);
+ ena_log_io(adapter->pdev, DBG,
+ "calling if_input() with mbuf %p", mbuf);
+ bool fast_path = ifp->if_classifier.post_packet(mbuf);
+ if (!fast_path) {
+ (*ifp->if_input)(ifp, mbuf);
+ }
}

counter_enter();
@@ -695,7 +587,9 @@ ena_rx_cleanup(struct ena_ring *rx_ring)
ena_refill_rx_bufs(rx_ring, refill_required);
}

- tcp_lro_flush_all(&rx_ring->lro);
+ //TODO: Can wait? investigate https://github.com/freebsd/freebsd-src/commit/e936121d3140af047a498559493b9375a6ba6ba3
+ //to port it back
+ //tcp_lro_flush_all(&rx_ring->lro);

return (ENA_RX_BUDGET - budget);
}
@@ -719,18 +613,18 @@ ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf,

offload = false;
ena_meta = &ena_tx_ctx->ena_meta;
- mss = mbuf->m_pkthdr.tso_segsz;
+ mss = mbuf->M_dat.MH.MH_pkthdr.tso_segsz;

if (mss != 0)
offload = true;

- if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0)
+ if ((mbuf->M_dat.MH.MH_pkthdr.csum_flags & CSUM_TSO) != 0)
offload = true;

- if ((mbuf->m_pkthdr.csum_flags & CSUM_OFFLOAD) != 0)
+ if ((mbuf->M_dat.MH.MH_pkthdr.csum_flags & CSUM_OFFLOAD) != 0)
offload = true;

- if ((mbuf->m_pkthdr.csum_flags & CSUM6_OFFLOAD) != 0)
+ if ((mbuf->M_dat.MH.MH_pkthdr.csum_flags & CSUM6_OFFLOAD) != 0)
offload = true;

if (!offload) {
@@ -764,12 +658,14 @@ ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf,
if ((ip->ip_off & htons(IP_DF)) != 0)
ena_tx_ctx->df = 1;
break;
+#ifdef INET6
case ETHERTYPE_IPV6:
ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
iphlen = ip6_lasthdr(mbuf, ehdrlen, IPPROTO_IPV6, &ipproto);
iphlen -= ehdrlen;
ena_tx_ctx->df = 1;
break;
+#endif
default:
iphlen = 0;
ipproto = 0;
@@ -779,25 +675,25 @@ ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct mbuf *mbuf,
mbuf_next = m_getptr(mbuf, iphlen + ehdrlen, &offset);
th = (struct tcphdr *)(mtodo(mbuf_next, offset));

- if ((mbuf->m_pkthdr.csum_flags & CSUM_IP) != 0) {
+ if ((mbuf->M_dat.MH.MH_pkthdr.csum_flags & CSUM_IP) != 0) {
ena_tx_ctx->l3_csum_enable = 1;
}
- if ((mbuf->m_pkthdr.csum_flags & CSUM_TSO) != 0) {
+ if ((mbuf->M_dat.MH.MH_pkthdr.csum_flags & CSUM_TSO) != 0) {
ena_tx_ctx->tso_enable = 1;
ena_meta->l4_hdr_len = (th->th_off);
}

if (ipproto == IPPROTO_TCP) {
ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
- if ((mbuf->m_pkthdr.csum_flags &
- (CSUM_IP_TCP | CSUM_IP6_TCP)) != 0)
+ if ((mbuf->M_dat.MH.MH_pkthdr.csum_flags &
+ (CSUM_TCP | CSUM_TCP_IPV6)) != 0)
ena_tx_ctx->l4_csum_enable = 1;
else
ena_tx_ctx->l4_csum_enable = 0;
} else if (ipproto == IPPROTO_UDP) {
ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
- if ((mbuf->m_pkthdr.csum_flags &
- (CSUM_IP_UDP | CSUM_IP6_UDP)) != 0)
+ if ((mbuf->M_dat.MH.MH_pkthdr.csum_flags &
+ (CSUM_UDP | CSUM_UDP_IPV6)) != 0)
ena_tx_ctx->l4_csum_enable = 1;
else
ena_tx_ctx->l4_csum_enable = 0;
@@ -827,7 +723,7 @@ ena_check_and_collapse_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
return (0);

if ((num_frags == adapter->max_tx_sgl_size) &&
- ((*mbuf)->m_pkthdr.len < tx_ring->tx_max_header_size))
+ ((*mbuf)->M_dat.MH.MH_pkthdr.len < tx_ring->tx_max_header_size))
return (0);

counter_u64_add(tx_ring->tx_stats.collapse, 1);
@@ -849,122 +745,40 @@ static int
ena_tx_map_mbuf(struct ena_ring *tx_ring, struct ena_tx_buffer *tx_info,
struct mbuf *mbuf, void **push_hdr, u16 *header_len)
{
- struct ena_adapter *adapter = tx_ring->adapter;
- struct ena_com_buf *ena_buf;
- bus_dma_segment_t segs[ENA_BUS_DMA_SEGS];
- size_t iseg = 0;
- uint32_t mbuf_head_len;
- uint16_t offset;
- int rc, nsegs;
-
- mbuf_head_len = mbuf->m_len;
- tx_info->mbuf = mbuf;
- ena_buf = tx_info->bufs;
+ int nsegs = 0;

- /*
- * For easier maintaining of the DMA map, map the whole mbuf even if
- * the LLQ is used. The descriptors will be filled using the segments.
- */
- rc = bus_dmamap_load_mbuf_sg(adapter->tx_buf_tag,
- tx_info->dmamap, mbuf, segs, &nsegs, BUS_DMA_NOWAIT);
- if (unlikely((rc != 0) || (nsegs == 0))) {
- ena_log_io(adapter->pdev, WARN,
- "dmamap load failed! err: %d nsegs: %d\n", rc, nsegs);
- goto dma_error;
- }
-
- if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
- /*
- * When the device is LLQ mode, the driver will copy
- * the header into the device memory space.
- * the ena_com layer assumes the header is in a linear
- * memory space.
- * This assumption might be wrong since part of the header
- * can be in the fragmented buffers.
- * First check if header fits in the mbuf. If not, copy it to
- * separate buffer that will be holding linearized data.
- */
- *header_len = min_t(uint32_t, mbuf->m_pkthdr.len,
- tx_ring->tx_max_header_size);
-
- /* If header is in linear space, just point into mbuf's data. */
- if (likely(*header_len <= mbuf_head_len)) {
- *push_hdr = mbuf->m_data;
- /*
- * Otherwise, copy whole portion of header from multiple
- * mbufs to intermediate buffer.
- */
- } else {
- m_copydata(mbuf, 0, *header_len,
- tx_ring->push_buf_intermediate_buf);
- *push_hdr = tx_ring->push_buf_intermediate_buf;
+ tx_info->mbuf = mbuf;

- counter_u64_add(tx_ring->tx_stats.llq_buffer_copy, 1);
- }
+ for (struct mbuf *m = mbuf; m != NULL; m = m->m_hdr.mh_next) {
+ int frag_len = m->m_hdr.mh_len;

- ena_log_io(adapter->pdev, DBG,
- "mbuf: %p header_buf->vaddr: %p push_len: %d\n",
- mbuf, *push_hdr, *header_len);
-
- /* If packet is fitted in LLQ header, no need for DMA segments. */
- if (mbuf->m_pkthdr.len <= tx_ring->tx_max_header_size) {
- return (0);
- } else {
- offset = tx_ring->tx_max_header_size;
- /*
- * As Header part is mapped to LLQ header, we can skip
- * it and just map the residuum of the mbuf to DMA
- * Segments.
- */
- while (offset > 0) {
- if (offset >= segs[iseg].ds_len) {
- offset -= segs[iseg].ds_len;
- } else {
- ena_buf->paddr = segs[iseg].ds_addr +
- offset;
- ena_buf->len = segs[iseg].ds_len -
- offset;
- ena_buf++;
- tx_info->num_of_bufs++;
- offset = 0;
- }
- iseg++;
- }
+ if (frag_len != 0) {
+ struct ena_com_buf *ena_buf = &tx_info->bufs[nsegs];
+ ena_buf->paddr = mmu::virt_to_phys(m->m_hdr.mh_data);
+ ena_buf->len = frag_len;
+ tx_info->num_of_bufs++;
+ if (++nsegs >= ENA_PKT_MAX_BUFS) //TODO: Do we loose data if this is true?
+ break;
}
- } else {
- *push_hdr = NULL;
- /*
- * header_len is just a hint for the device. Because FreeBSD is
- * not giving us information about packet header length and it
- * is not guaranteed that all packet headers will be in the 1st
- * mbuf, setting header_len to 0 is making the device ignore
- * this value and resolve header on it's own.
- */
- *header_len = 0;
}

- /* Map rest of the mbuf */
- while (iseg < nsegs) {
- ena_buf->paddr = segs[iseg].ds_addr;
- ena_buf->len = segs[iseg].ds_len;
- ena_buf++;
- iseg++;
- tx_info->num_of_bufs++;
- }
+ *push_hdr = NULL;
+ /*
+ * header_len is just a hint for the device. Because FreeBSD is
+ * not giving us information about packet header length and it
+ * is not guaranteed that all packet headers will be in the 1st
+ * mbuf, setting header_len to 0 is making the device ignore
+ * this value and resolve header on it's own.
+ */
+ *header_len = 0;

return (0);
-
-dma_error:
- counter_u64_add(tx_ring->tx_stats.dma_mapping_err, 1);
- tx_info->mbuf = NULL;
- return (rc);
}

static int
ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
{
struct ena_adapter *adapter;
- device_t pdev;
struct ena_tx_buffer *tx_info;
struct ena_com_tx_ctx ena_tx_ctx;
struct ena_com_dev *ena_dev;
@@ -979,30 +793,29 @@ ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)

ena_qid = ENA_IO_TXQ_IDX(tx_ring->que->id);
adapter = tx_ring->que->adapter;
- pdev = adapter->pdev;
ena_dev = adapter->ena_dev;
io_sq = &ena_dev->io_sq_queues[ena_qid];

rc = ena_check_and_collapse_mbuf(tx_ring, mbuf);
if (unlikely(rc != 0)) {
- ena_log_io(pdev, WARN, "Failed to collapse mbuf! err: %d\n",
+ ena_log_io(adapter->pdev, WARN, "Failed to collapse mbuf! err: %d",
rc);
return (rc);
}

- ena_log_io(pdev, DBG, "Tx: %d bytes\n", (*mbuf)->m_pkthdr.len);
+ ena_log_io(adapter->pdev, DBG, "Tx: %d bytes", (*mbuf)->M_dat.MH.MH_pkthdr.len);

next_to_use = tx_ring->next_to_use;
req_id = tx_ring->free_tx_ids[next_to_use];
tx_info = &tx_ring->tx_buffer_info[req_id];
tx_info->num_of_bufs = 0;

ENA_WARN(tx_info->mbuf != NULL, adapter->ena_dev,
- "mbuf isn't NULL for req_id %d\n", req_id);
+ "mbuf isn't NULL for req_id %d", req_id);

rc = ena_tx_map_mbuf(tx_ring, tx_info, *mbuf, &push_hdr, &header_len);
if (unlikely(rc != 0)) {
- ena_log_io(pdev, WARN, "Failed to map TX mbuf\n");
+ ena_log_io(adapter->pdev, WARN, "Failed to map TX mbuf");
return (rc);
}
memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
@@ -1017,8 +830,8 @@ ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)

if (tx_ring->acum_pkts == ENA_DB_THRESHOLD ||
ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx)) {
- ena_log_io(pdev, DBG,
- "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
+ ena_log_io(adapter->pdev, DBG,
+ "llq tx max burst size of queue %d achieved, writing doorbell to send burst",
tx_ring->que->id);
ena_ring_tx_doorbell(tx_ring);
}
@@ -1027,10 +840,10 @@ ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
rc = ena_com_prepare_tx(io_sq, &ena_tx_ctx, &nb_hw_desc);
if (unlikely(rc != 0)) {
if (likely(rc == ENA_COM_NO_MEM)) {
- ena_log_io(pdev, DBG, "tx ring[%d] is out of space\n",
+ ena_log_io(adapter->pdev, DBG, "tx ring[%d] is out of space",
tx_ring->que->id);
} else {
- ena_log(pdev, ERR, "failed to prepare tx bufs\n");
+ ena_log(adapter->pdev, ERR, "failed to prepare tx bufs");
ena_trigger_reset(adapter,
ENA_REGS_RESET_DRIVER_INVALID_STATE);
}
@@ -1041,15 +854,15 @@ ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
counter_enter();
counter_u64_add_protected(tx_ring->tx_stats.cnt, 1);
counter_u64_add_protected(tx_ring->tx_stats.bytes,
- (*mbuf)->m_pkthdr.len);
+ (*mbuf)->M_dat.MH.MH_pkthdr.len);

counter_u64_add_protected(adapter->hw_stats.tx_packets, 1);
counter_u64_add_protected(adapter->hw_stats.tx_bytes,
- (*mbuf)->m_pkthdr.len);
+ (*mbuf)->M_dat.MH.MH_pkthdr.len);
counter_exit();

tx_info->tx_descs = nb_hw_desc;
- getbinuptime(&tx_info->timestamp);
+ tx_info->timestamp = osv::clock::uptime::now().time_since_epoch().count();
tx_info->print_once = true;

tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
@@ -1061,7 +874,7 @@ ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
*/
if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
adapter->max_tx_sgl_size + 2))) {
- ena_log_io(pdev, DBG, "Stop queue %d\n", tx_ring->que->id);
+ ena_log_io(adapter->pdev, DBG, "Stop queue %d", tx_ring->que->id);

tx_ring->running = false;
counter_u64_add(tx_ring->tx_stats.queue_stop, 1);
@@ -1082,14 +895,10 @@ ena_xmit_mbuf(struct ena_ring *tx_ring, struct mbuf **mbuf)
}
}

- bus_dmamap_sync(adapter->tx_buf_tag, tx_info->dmamap,
- BUS_DMASYNC_PREWRITE);
-
return (0);

dma_error:
tx_info->mbuf = NULL;
- bus_dmamap_unload(adapter->tx_buf_tag, tx_info->dmamap);

return (rc);
}
@@ -1100,54 +909,59 @@ ena_start_xmit(struct ena_ring *tx_ring)
struct mbuf *mbuf;
struct ena_adapter *adapter = tx_ring->adapter;
int ret = 0;
+ int mnum = 0;

ENA_RING_MTX_ASSERT(tx_ring);

- if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
+ if (unlikely((adapter->ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))
return;

if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_LINK_UP, adapter)))
return;

- while ((mbuf = drbr_peek(adapter->ifp, tx_ring->br)) != NULL) {
+ while ((mbuf = static_cast<struct mbuf *>(buf_ring_peek_clear_sc(tx_ring->br))) != NULL) {
ena_log_io(adapter->pdev, DBG,
- "\ndequeued mbuf %p with flags %#x and header csum flags %#jx\n",
- mbuf, mbuf->m_flags, (uint64_t)mbuf->m_pkthdr.csum_flags);
+ "dequeued mbuf %p with flags %#x and header csum flags %#jx",
+ mbuf, mbuf->m_hdr.mh_flags, (uint64_t)mbuf->M_dat.MH.MH_pkthdr.csum_flags);

if (unlikely(!tx_ring->running)) {
- drbr_putback(adapter->ifp, tx_ring->br, mbuf);
+ buf_ring_putback_sc(tx_ring->br, mbuf);
break;
}

if (unlikely((ret = ena_xmit_mbuf(tx_ring, &mbuf)) != 0)) {
if (ret == ENA_COM_NO_MEM) {
- drbr_putback(adapter->ifp, tx_ring->br, mbuf);
+ buf_ring_putback_sc(tx_ring->br, mbuf);
} else if (ret == ENA_COM_NO_SPACE) {
- drbr_putback(adapter->ifp, tx_ring->br, mbuf);
+ buf_ring_putback_sc(tx_ring->br, mbuf);
} else {
m_freem(mbuf);
- drbr_advance(adapter->ifp, tx_ring->br);
+ buf_ring_advance_sc(tx_ring->br);
}

break;
}

- drbr_advance(adapter->ifp, tx_ring->br);
+ buf_ring_advance_sc(tx_ring->br);

- if (unlikely((if_getdrvflags(adapter->ifp) & IFF_DRV_RUNNING) == 0))
+ if (unlikely((adapter->ifp->if_drv_flags & IFF_DRV_RUNNING) == 0))
return;

tx_ring->acum_pkts++;

BPF_MTAP(adapter->ifp, mbuf);
+ mnum++;
}

+ ena_log_io(adapter->pdev, INFO,
+ "dequeued %d mbufs", mnum);
+
if (likely(tx_ring->acum_pkts != 0)) {
/* Trigger the dma engine */
ena_ring_tx_doorbell(tx_ring);
+ ena_log(adapter->pdev, DBG, "Rang TX doorbell, tx_ring->running:%d", tx_ring->running);
}

if (unlikely(!tx_ring->running))
- taskqueue_enqueue(tx_ring->que->cleanup_tq,
- &tx_ring->que->cleanup_task);
+ tx_ring->que->cleanup_thread->wake_with([tx_ring] { tx_ring->que->cleanup_pending++; });
}
diff --git a/bsd/sys/dev/ena/ena_datapath.h b/bsd/sys/dev/ena/ena_datapath.h
--- a/bsd/sys/dev/ena/ena_datapath.h
+++ b/bsd/sys/dev/ena/ena_datapath.h
@@ -34,12 +34,14 @@
#ifndef ENA_TXRX_H
#define ENA_TXRX_H

-void ena_cleanup(void *arg, int pending);
+typedef struct ifnet* if_t;
+
+void ena_cleanup(struct ena_que *que);
void ena_qflush(if_t ifp);
int ena_mq_start(if_t ifp, struct mbuf *m);
-void ena_deferred_mq_start(void *arg, int pending);
+void ena_deferred_mq_start(ena_ring *ring);

#define CSUM_OFFLOAD (CSUM_IP | CSUM_TCP | CSUM_UDP)
-#define CSUM6_OFFLOAD (CSUM_IP6_UDP | CSUM_IP6_TCP)
+#define CSUM6_OFFLOAD (CSUM_UDP_IPV6 | CSUM_TCP_IPV6)

#endif /* ENA_TXRX_H */
Reply all
Reply to author
Forward
0 new messages