if (unlikely(!(i & (E1000_RX_BUFFER_WRITE - 1)))) { /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, * such as IA-64). */ wmb(); if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA) e1000e_update_rdt_wa(rx_ring, i); else writel(i, rx_ring->tail); } //移动到下一个 ring entry i++; if (i == rx_ring->count) i = 0; buffer_info = &rx_ring->buffer_info[i]; }
/** * e1000_clean_rx_irq - Send received data up the network stack * @rx_ring: Rx descriptor ring * @work_done: output parameter for indicating completed work * @work_to_do: how many packets we can clean * * the return value indicates whether actual cleaning was done, there * is no guarantee that everything was cleaned **/ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done, int work_to_do) { //初始化指针与状态,rx_desc 是网卡写入的描述符,包含状态位和数据长度 struct e1000_adapter *adapter = rx_ring->adapter; struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; struct e1000_hw *hw = &adapter->hw; union e1000_rx_desc_extended *rx_desc, *next_rxd; struct e1000_buffer *buffer_info, *next_buffer; u32 length, staterr; unsigned int i; //清理了多少个的同时也意味着要重新构造多少个 int cleaned_count = 0; bool cleaned = false; unsigned int total_rx_bytes = 0, total_rx_packets = 0;
/* !EOP means multiple descriptors were used to store a single * packet, if that's the case we need to toss it. In fact, we * need to toss every packet with the EOP bit clear and the * next frame that _does_ have the EOP bit set, as it is by * definition only a frame fragment */ if (unlikely(!(staterr & E1000_RXD_STAT_EOP))) adapter->flags2 |= FLAG2_IS_DISCARDING;
if (adapter->flags2 & FLAG2_IS_DISCARDING) { /* All receives must fit into a single buffer */ e_dbg("Receive packet consumed multiple buffers\n"); /* recycle */ buffer_info->skb = skb; if (staterr & E1000_RXD_STAT_EOP) adapter->flags2 &= ~FLAG2_IS_DISCARDING; goto next_desc; }
/* adjust length to remove Ethernet CRC */ if (!(adapter->flags2 & FLAG2_CRC_STRIPPING)) { /* If configured to store CRC, don't subtract FCS, * but keep the FCS bytes out of the total_rx_bytes * counter */ if (netdev->features & NETIF_F_RXFCS) total_rx_bytes -= 4; else length -= 4; }
total_rx_bytes += length; total_rx_packets++;
/* code added for copybreak, this should improve * performance for small packets with large amounts * of reassembly being done in the stack */ if (length < copybreak) { //长度过小 struct sk_buff *new_skb = //如果数据包小于256字节,分配小包专用的skb napi_alloc_skb(&adapter->napi, length); if (new_skb) { //拷贝数据(含 NET_IP_ALIGN 前缀对齐) skb_copy_to_linear_data_offset(new_skb, -NET_IP_ALIGN, (skb->data - NET_IP_ALIGN), (length + NET_IP_ALIGN)); /* save the skb in buffer_info as good */看看,这里又把大的skb重新放回去了 buffer_info->skb = skb; //使用小包 skb = new_skb; } /* else just continue with the old one */ } /* end copybreak code */ skb_put(skb, length);
/* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= E1000_RX_BUFFER_WRITE) { adapter->alloc_rx_buf(rx_ring, cleaned_count, GFP_ATOMIC); cleaned_count = 0; }
static void e1000_alloc_rx_buffers(struct e1000_ring *rx_ring, int cleaned_count, gfp_t gfp) { 省略一堆 //这里的复用就包含了这个大的skb(相对于小包来说太大了) while (cleaned_count--) { skb = buffer_info->skb; if (skb) { skb_trim(skb, 0); goto map_skb; }
总结
现在对内容进行总结:
1 2 3 4
graph LR Aa(驱动初始化时调用alloc_rx_buffers方法分配rinfbuf)-->Ab(完成数据包分配及DMA映射) Ba(网卡传输数据)-->Bb(直接DMA找到ringbuf映射的地址写入数据) Ca(ksoftirqd进程工作)-->Cc(调用e1000_clean_rx_irq)-->Cb(把数据包丢给上层并复用skb)