有没有朋友测试过82576 igb的驱动性能,这样做是否可以提高收包速度呢?
_______________________________________________
Linux 内核开发中文邮件列表
Linux-...@zh-kernel.org
http://zh-kernel.org/mailman/listinfo/linux-kernel
Linux 内核开发中文社区: http://zh-kernel.org
2010/7/26 bekars <bek...@gmail.com>
中断收包之后调用这个函数:
static struct sk_buff *igb_lro_queue(struct igb_q_vector *q_vector,
struct sk_buff *new_skb,
u16 tag)
{
struct sk_buff *lro_skb;
struct igb_lro_desc *lrod;
struct hlist_node *node;
struct skb_shared_info *new_skb_info = skb_shinfo(new_skb);
struct igb_lro_list *lrolist = q_vector->lrolist;
struct iphdr *iph = (struct iphdr *)new_skb->data;
struct tcphdr *th = (struct tcphdr *)(iph + 1);
int tcp_data_len = igb_lro_header_ok(new_skb, iph, th);
u16 opt_bytes = (th->doff << 2) - sizeof(*th);
u32 *ts_ptr = (opt_bytes ? (u32 *)(th + 1) : NULL);
u32 seq = ntohl(th->seq);
/*
* we have a packet that might be eligible for LRO,
* so see if it matches anything we might expect
*/
hlist_for_each_entry(lrod, node, &lrolist->active, lro_node) {
if (lrod->source_port != th->source ||
lrod->dest_port != th->dest ||
lrod->source_ip != iph->saddr ||
lrod->dest_ip != iph->daddr ||
lrod->vlan_tag != tag)
continue;
/*
* malformed header, no tcp data, resultant packet would
* be too large, ack sequence numbers do not match, window
* size has changed, or new skb is larger than our current
mss.
* If any of the above we should flush the lro descriptor
and
* start over if possible
*/
if (tcp_data_len <= 0 || (tcp_data_len + lrod->len) > 65521
||
lrod->ack_seq != th->ack_seq ||
lrod->window != th->window ||
lrod->mss < tcp_data_len) {
igb_lro_flush(q_vector, lrod);
break;
}
/* out of order packet */
if (seq != lrod->next_seq) {
igb_lro_flush(q_vector, lrod);
tcp_data_len = -1;
break;
}
if (lrod->opt_bytes || opt_bytes) {
u32 tsval = ntohl(*(ts_ptr + 1));
/* make sure timestamp values are increasing */
if (opt_bytes != lrod->opt_bytes ||
lrod->tsval > tsval || *(ts_ptr + 2) == 0) {
igb_lro_flush(q_vector, lrod);
tcp_data_len = -1;
break;
}
lrod->tsval = tsval;
lrod->tsecr = *(ts_ptr + 2);
}
/* remove any padding from the end of the skb */
__pskb_trim(new_skb, ntohs(iph->tot_len));
/* Remove IP and TCP header*/
skb_pull(new_skb, ntohs(iph->tot_len) - tcp_data_len);
lrod->next_seq += tcp_data_len;
lrod->len += tcp_data_len;
lrod->psh |= th->psh;
lrod->append_cnt++;
lrolist->stats.coal++;
lro_skb = lrod->skb;
#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
/* if header is empty pull pages into current skb */
if (!skb_headlen(new_skb) &&
((skb_shinfo(lro_skb)->nr_frags +
skb_shinfo(new_skb)->nr_frags) <= MAX_SKB_FRAGS )) {
struct skb_shared_info *lro_skb_info =
skb_shinfo(lro_skb);
/* copy frags into the last skb */
memcpy(lro_skb_info->frags + lro_skb_info->nr_frags,
new_skb_info->frags,
new_skb_info->nr_frags * sizeof(skb_frag_t));
lro_skb_info->nr_frags += new_skb_info->nr_frags;
lro_skb->len += tcp_data_len;
lro_skb->data_len += tcp_data_len;
lro_skb->truesize += tcp_data_len;
new_skb_info->nr_frags = 0;
new_skb->truesize -= tcp_data_len;
new_skb->len = new_skb->data_len = 0;
new_skb->data = skb_mac_header(new_skb);
skb_reset_tail_pointer(new_skb);
new_skb->protocol = 0;
lrolist->stats.recycled++;
} else {
#endif
/* Chain this new skb in frag_list */
new_skb->prev = lro_skb;
lro_skb->next = new_skb;
lrod->skb = new_skb ;
new_skb = NULL;
#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT
}
#endif
if (lrod->psh || (tcp_data_len < lrod->mss))
igb_lro_flush(q_vector, lrod);
return new_skb;
}
/* start a new packet */
if (tcp_data_len > 0 && !hlist_empty(&lrolist->free) && !th->psh) {
lrod = hlist_entry(lrolist->free.first, struct igb_lro_desc,
lro_node);
/* remove any padding from the end of the skb */
__pskb_trim(new_skb, ntohs(iph->tot_len));
lrod->skb = new_skb;
lrod->source_ip = iph->saddr;
lrod->dest_ip = iph->daddr;
lrod->source_port = th->source;
lrod->dest_port = th->dest;
lrod->vlan_tag = tag;
lrod->len = new_skb->len;
lrod->next_seq = seq + tcp_data_len;
lrod->ack_seq = th->ack_seq;
lrod->window = th->window;
lrod->mss = tcp_data_len;
lrod->opt_bytes = opt_bytes;
lrod->psh = 0;
lrod->append_cnt = 0;
/* record timestamp if it is present */
if (opt_bytes) {
lrod->tsval = ntohl(*(ts_ptr + 1));
lrod->tsecr = *(ts_ptr + 2);
}
/* remove first packet from freelist.. */
hlist_del(&lrod->lro_node);
/* .. and insert at the front of the active list */
hlist_add_head(&lrod->lro_node, &lrolist->active);
lrolist->active_cnt++;
lrolist->stats.coal++;
return NULL;
}
/* packet not handled by any of the above, pass it to the stack */
igb_receive_skb(q_vector, new_skb, tag);
return NULL;
以前是协议栈做的。很多网卡都支持LRO。
这是一种hardware offload技术,用于降低CPU的占用率,从而可以在高负载情况下,网卡的性能的可扩展性好一些。
类似的技术有hardware checksum, TSO, TOE等等, 都是hardware offload技术。
--
Cheers,
Oliver Yang
Twitter: http://twitter.com/yangoliver
Blog: http://blog.csdn.net/yayong
--------------------------------------------------------------------
An OpenSolaris Developer
> 在 2010年7月26日 下午1:46,蓝天宇 <lantia...@gmail.com> 写道:
> > .... 这部分放在驱动里做?个人感觉应该在协议栈做更合理。没有看过相应代码,发出来看看。
>
> 以前是协议栈做的。很多网卡都支持LRO。
>
> 这是一种hardware offload技术,用于降低CPU的占用率,从而可以在高负载情况下,网卡的性能的可扩展性好一些。
>
> 类似的技术有hardware checksum, TSO, TOE等等, 都是hardware offload技术。
>
>
> --
> Cheers,
>
> Oliver Yang
>
> Twitter: http://twitter.com/yangoliver
> Blog: http://blog.csdn.net/yayong
> --------------------------------------------------------------------
> An OpenSolaris Developer
>
但是实际上这块是在软中断里面做的吧。
这个和在协议栈里面做只是减少了查路由的操作吧,
但是还加入了一个hash链表的查找,这样能提高多少性能呢?
还是说82576的硬件支持一些高级的功能,比如分光什么的,通过在驱动中这样做可以更好的发挥硬件性能?