技术交流

好好学习,天天向上。

0%

e1000e网卡驱动源码分析

[toc]

调用栈分析

栈——从insmod到modprobe

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
(gdb) bt
#0 e1000_probe (pdev=0xffff888106faa000, ent=0xffffffffa0a710a0) at drivers/net/ethernet/intel/e1000e/netdev.c:7043
#1 0xffffffff814515a1 in local_pci_probe (_ddi=0xffffc900015abba8) at drivers/pci/pci-driver.c:335
#2 0xffffffff8145225b in pci_call_probe (id=<optimized out>, dev=<optimized out>, drv=<optimized out>) at drivers/pci/pci-driver.c:389
#3 __pci_device_probe (pci_dev=<optimized out>, drv=<optimized out>) at drivers/pci/pci-driver.c:414
#4 pci_device_probe (dev=0xffff888106faa0b0) at drivers/pci/pci-driver.c:454
#5 0xffffffff815650ed in really_probe (drv=<optimized out>, dev=<optimized out>) at drivers/base/dd.c:440
#6 driver_probe_device (drv=0xffff888106faa000, dev=0xffffffffa0a710a0) at drivers/base/dd.c:582
#7 0xffffffff81565500 in __driver_attach (dev=0xffff888106faa0b0, data=0xffffffffa0a81f90 <e1000_driver+144>) at drivers/base/dd.c:816
#8 0xffffffff81562d77 in bus_for_each_dev (bus=<optimized out>, start=<optimized out>, data=0x0 <irq_stack_union>, fn=0x0 <irq_stack_union>) at drivers/base/bus.c:311
#9 0xffffffff81564aea in driver_attach (drv=<optimized out>) at drivers/base/dd.c:835
#10 0xffffffff81564465 in bus_add_driver (drv=0xffffffffa0a81f90 <e1000_driver+144>) at drivers/base/bus.c:667
#11 0xffffffff815660bb in driver_register (drv=0xffffffffa0a81f90 <e1000_driver+144>) at drivers/base/driver.c:170
#12 0xffffffff81002766 in do_one_initcall (fn=0xffffffffa0544000 <e1000_init_module>) at init/main.c:892
#13 0xffffffff8114748a in do_init_module (mod=0xffffffffa0a857c0) at kernel/module.c:3465
#14 0xffffffff81149830 in load_module (info=0xffffc900015abea0, uargs=<optimized out>, flags=<optimized out>) at kernel/module.c:3793
#15 0xffffffff81149e28 in __do_sys_finit_module (fd=3, uargs=0x558feb1857b6 "", flags=0) at kernel/module.c:3887
#16 0xffffffff8100417b in do_syscall_64 (nr=<optimized out>, regs=0xffffffffa0a710a0) at arch/x86/entry/common.c:293
#17 0xffffffff81a000ad in entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:177
#18 0x0000000000000000 in ?? ()
(gdb) p *ent
$2 = {
vendor = 0x8086,
device = 0x10D3,
subvendor = 0xFFFFFFFF,
subdevice = 0xFFFFFFFF,
class = 0,
class_mask = 0,
driver_data = 3
}

栈——从收到报文到发送ack

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#0  e1000_xmit_frame (skb=0xffff88826911fc00, netdev=0xffff88826820c000) at drivers/net/ethernet/intel/e1000e/netdev.c:5769
#1 0xffffffff816d6f15 in __netdev_start_xmit (more=<optimized out>, dev=<optimized out>, skb=<optimized out>, ops=<optimized out>) at ./include/linux/netdevice.h:4534
#2 netdev_start_xmit (more=<optimized out>, txq=<optimized out>, dev=<optimized out>, skb=<optimized out>) at ./include/linux/netdevice.h:4543
-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑进入e1000e的发包流程↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-
#3 xmit_one (more=<optimized out>, txq=<optimized out>, dev=<optimized out>, skb=<optimized out>) at net/core/dev.c:3261
#4 dev_hard_start_xmit (first=0xffff88826911fc00, dev=0xffff88826820c000, txq=0xffff88826d2f3800, ret=<optimized out>) at net/core/dev.c:3277
#5 0xffffffff81721251 in sch_direct_xmit (skb=0xffff88826911fc00, q=0xffff888266488e00, dev=0xffff88826820c000, txq=0xffff88826d2f3800, root_lock=0xffff888266488eac, validate=<optimized out>) at net/sched/sch_generic.c:312
#6 0xffffffff816d7966 in __dev_xmit_skb (txq=<optimized out>, dev=<optimized out>, q=<optimized out>, skb=<optimized out>) at net/core/dev.c:3497
#7 __dev_queue_xmit (skb=0xffff88826911fc00, sb_dev=<optimized out>) at net/core/dev.c:3824
#8 0xffffffff816d7b1c in dev_queue_xmit (skb=<optimized out>) at net/core/dev.c:3889
#9 0xffffffff81748690 in neigh_hh_output (skb=<optimized out>, hh=<optimized out>) at ./include/net/neighbour.h:496
#10 neigh_output (skb=<optimized out>, n=<optimized out>) at ./include/net/neighbour.h:504
#11 ip_finish_output2 (net=<optimized out>, sk=<optimized out>, skb=0xffff88826911fc00) at net/ipv4/ip_output.c:229
#12 0xffffffff81749f9c in NF_HOOK_COND (pf=<optimized out>, hook=<optimized out>, in=<optimized out>, okfn=<optimized out>, cond=<optimized out>, out=<optimized out>, skb=<optimized out>, sk=<optimized out>, net=<optimized out>) at ./include/linux/netfilter.h:278
#13 ip_output (net=0xffff88826911fc00, sk=0xffff88826820c000, skb=0xffff88826911fc00) at net/ipv4/ip_output.c:405
#14 0xffffffff81763d94 in __tcp_transmit_skb (sk=0xffff88826402af80, skb=0xffff88826911fc00, clone_it=<optimized out>, gfp_mask=<optimized out>, rcv_nxt=3686429075) at net/ipv4/tcp_output.c:1172
#15 0xffffffff8176630e in __tcp_send_ack (sk=<optimized out>, rcv_nxt=<optimized out>) at net/ipv4/tcp_output.c:3651
-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑协议栈决定发送ack给对端↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-
#16 0xffffffff81760a18 in tcp_ack_snd_check (sk=<optimized out>) at net/ipv4/tcp_input.c:5235
#17 tcp_rcv_established (sk=0xffff88826402af80, skb=0xffff88826f334800) at net/ipv4/tcp_input.c:5651
#18 0xffffffff8176c37a in tcp_v4_do_rcv (sk=0xffff88826402af80, skb=0xffff88826f334800) at net/ipv4/tcp_ipv4.c:1535
#19 0xffffffff8176ddc5 in tcp_v4_rcv (skb=0xffff88826f334800) at net/ipv4/tcp_ipv4.c:1820
#20 0xffffffff817445b9 in ip_local_deliver_finish (net=0xffffffff82393580 <init_net>, sk=<optimized out>, skb=0xffff88826f334800) at net/ipv4/ip_input.c:218
#21 0xffffffff817448db in NF_HOOK (sk=<optimized out>, pf=<optimized out>, hook=<optimized out>, in=<optimized out>, out=<optimized out>, okfn=<optimized out>, skb=<optimized out>, net=<optimized out>) at ./include/linux/netfilter.h:289
#22 NF_HOOK (pf=<optimized out>, sk=<optimized out>, out=<optimized out>, okfn=<optimized out>, in=<optimized out>, skb=<optimized out>, net=<optimized out>, hook=<optimized out>) at ./include/linux/netfilter.h:283
#23 ip_local_deliver (skb=0xffff88826f334800) at net/ipv4/ip_input.c:260
#24 0xffffffff81744bc3 in NF_HOOK (sk=<optimized out>, pf=<optimized out>, hook=<optimized out>, in=<optimized out>, out=<optimized out>, okfn=<optimized out>, +skb=<optimized out>, net=<optimized out>) at ./include/linux/netfilter.h:289
#25 NF_HOOK (pf=<optimized out>, sk=<optimized out>, out=<optimized out>, okfn=<optimized out>, in=<optimized out>, skb=<optimized out>, net=<optimized out>, hook=<optimized out>) at ./include/linux/netfilter.h:283
#26 ip_rcv (skb=0xffff88826f334800, dev=0xffff88826820c000, pt=<optimized out>, orig_dev=<optimized out>) at net/ipv4/ip_input.c:500
#27 0xffffffff816d57af in deliver_skb (orig_dev=<optimized out>, pt_prev=<optimized out>, skb=<optimized out>) at net/core/dev.c:1963
#28 deliver_ptype_list_skb (ptype_list=<optimized out>, type=<optimized out>, orig_dev=<optimized out>, pt=<optimized out>, skb=<optimized out>) at net/core/dev.c:1978
#29 __netif_receive_skb_core (skb=0xffff88826f334400, pfmemalloc=<optimized out>) at net/core/dev.c:4919
#30 0xffffffff816d85c2 in netif_receive_skb_internal (skb=<optimized out>) at net/core/dev.c:5071
#31 netif_receive_skb_internal (skb=0xffff88826911fc00) at net/core/dev.c:5036
#32 0xffffffff816d931a in napi_skb_finish (skb=<optimized out>, ret=GRO_NORMAL) at net/core/dev.c:5445
#33 napi_gro_receive (napi=0xffff88826820ce90, skb=0xffff88826f334400) at net/core/dev.c:5476
#34 0xffffffffa04a41a7 in e1000_receive_skb (adapter=<optimized out>, netdev=<optimized out>, skb=<optimized out>, staterr=<optimized out>, vlan=<optimized out>) at drivers/net/ethernet/intel/e1000e/netdev.c:564
-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑将接收到的skb(报文的载体)上送协议栈↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-
#35 0xffffffffa04a5849 in e1000_clean_rx_irq (rx_ring=0xffff88826f81e100, work_done=<optimized out>, work_to_do=<optimized out>) at drivers/net/ethernet/intel/e1000e/netdev.c:1025
-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑调用e1000e预设的接收回调↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-
#36 0xffffffffa04acc29 in e1000e_poll (napi=0xffff88826820ce90, budget=64) at drivers/net/ethernet/intel/e1000e/netdev.c:2670
-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑驱动的poll被唤醒↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-
#37 0xffffffff816d8a29 in napi_poll (repoll=<optimized out>, n=<optimized out>) at net/core/dev.c:6094
#38 net_rx_action (h=<optimized out>) at net/core/dev.c:6160
-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑调用napi设备通用回调↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-
#39 0xffffffff81c000e3 in __do_softirq () at kernel/softirq.c:292
-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑软中断启动↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-
#40 0xffffffff810ad8b0 in invoke_softirq () at kernel/softirq.c:373
-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑硬件中断进行简单的记录,调用软中断做后续处理↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-
#41 irq_exit () at kernel/softirq.c:413
#42 0xffffffff81a01e65 in exiting_irq () at ./arch/x86/include/asm/apic.h:536
#43 do_IRQ (regs=0xffffc90000ccfe08) at arch/x86/kernel/irq.c:258
#44 0xffffffff81a009cf in common_interrupt () at arch/x86/entry/entry_64.S:588
-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑硬件中断处理流程↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-
#45 0xffffc90000ccfe08 in ?? ()
#46 0x0000000000000000 in ?? ()
-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑网卡硬件触发中断↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-↑-


数据结构

e1000_buffer——对skb的封装

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
struct e1000_buffer {
dma_addr_t dma; /* skb 的 dma 地址,有内核函数负责内核虚拟地址和 dma 地址之间的映射 */
struct sk_buff *skb;
union {
/* Tx */
struct {
unsigned long time_stamp;
u16 length;
u16 next_to_watch;
unsigned int segs;
unsigned int bytecount;
u16 mapped_as_page;
};
/* Rx */
struct {
/* arrays of page information for packet split */
struct e1000_ps_page *ps_pages;
struct page *page;
};
};
};

union e1000_tx_desc——tx时与硬件交互的媒介

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
struct e1000_tx_desc {
__le64 buffer_addr; /* Address of the descriptor's data buffer */
union {
__le32 data;
struct {
__le16 length; /* Data buffer length */
u8 cso; /* Checksum offset */
u8 cmd; /* Descriptor control */
} flags;
} lower;
union {
__le32 data;
struct {
u8 status; /* Descriptor status */
u8 css; /* Checksum start */
__le16 special;
} fields;
} upper;
};

union e1000_rx_desc_extended——rx时与硬件交互的媒介

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
union e1000_rx_desc_extended {
struct {
__le64 buffer_addr;
__le64 reserved;
} read;
struct {
struct {
__le32 mrq; /* Multiple Rx Queues */
union {
__le32 rss; /* RSS Hash */
struct {
__le16 ip_id; /* IP id */
__le16 csum; /* Packet Checksum */
} csum_ip;
} hi_dword;
} lower;
struct {
__le32 status_error; /* ext status/error */
__le16 length;
__le16 vlan; /* VLAN tag */
} upper;
} wb; /* writeback */
};

ring数据结构——控制数据收发的环形数据结构

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
struct e1000_ring {
struct e1000_adapter *adapter; /* 指向厂商私有数据结构,见后续分析 */
void *desc; /* desc内存区域 */
dma_addr_t dma; /* ring的dma地址 */
unsigned int size; /* ring的长度 */
unsigned int count; /* 该ring中有多少个desc */

u16 next_to_use; /* 指向下一次分配内存时的目标desc */
u16 next_to_clean; /* 指向下一次回收内存时的目标desc */

void __iomem *head;
void __iomem *tail;

/* array of buffer information structs */
struct e1000_buffer *buffer_info;

char name[IFNAMSIZ + 5];
u32 ims_val;
u32 itr_val;
void __iomem *itr_register;
int set_itr;

struct sk_buff *rx_skb_top;
};

adapter——厂商为方便实现定义的数据结构

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
/* board specific private data structure */
struct e1000_adapter {
struct timer_list watchdog_timer;
struct timer_list phy_info_timer;
struct timer_list blink_timer;

struct work_struct reset_task;
struct work_struct watchdog_task;

const struct e1000_info *ei;

unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
u32 bd_number;
u32 rx_buffer_len;
u16 mng_vlan_id;
u16 link_speed;
u16 link_duplex;
u16 eeprom_vers;

/* track device up/down/testing state */
unsigned long state;

/* Interrupt Throttle Rate */
u32 itr;
u32 itr_setting;
u16 tx_itr;
u16 rx_itr;

/* Tx - one ring per active queue */
struct e1000_ring *tx_ring ____cacheline_aligned_in_smp; /* 发送队列定义在这里 */
u32 tx_fifo_limit;

struct napi_struct napi;

unsigned int uncorr_errors; /* uncorrectable ECC errors */
unsigned int corr_errors; /* correctable ECC errors */
unsigned int restart_queue;
u32 txd_cmd;

bool detect_tx_hung;
bool tx_hang_recheck;
u8 tx_timeout_factor;

u32 tx_int_delay;
u32 tx_abs_int_delay;

unsigned int total_tx_bytes;
unsigned int total_tx_packets;
unsigned int total_rx_bytes;
unsigned int total_rx_packets;

/* Tx stats */
u64 tpt_old;
u64 colc_old;
u32 gotc;
u64 gotc_old;
u32 tx_timeout_count;
u32 tx_fifo_head;
u32 tx_head_addr;
u32 tx_fifo_size;
u32 tx_dma_failed;
u32 tx_hwtstamp_timeouts;
u32 tx_hwtstamp_skipped;

/* Rx */
bool (*clean_rx)(struct e1000_ring *ring, int *work_done,
int work_to_do) ____cacheline_aligned_in_smp;
void (*alloc_rx_buf)(struct e1000_ring *ring, int cleaned_count,
gfp_t gfp);
struct e1000_ring *rx_ring; /* 接收队列定义在这里 */

u32 rx_int_delay;
u32 rx_abs_int_delay;

/* Rx stats */
u64 hw_csum_err;
u64 hw_csum_good;
u64 rx_hdr_split;
u32 gorc;
u64 gorc_old;
u32 alloc_rx_buff_failed;
u32 rx_dma_failed;
u32 rx_hwtstamp_cleared;

unsigned int rx_ps_pages;
u16 rx_ps_bsize0;
u32 max_frame_size;
u32 min_frame_size;

/* OS defined structs */
struct net_device *netdev;
struct pci_dev *pdev;

/* structs defined in e1000_hw.h */
struct e1000_hw hw;

spinlock_t stats64_lock; /* protects statistics counters */
struct e1000_hw_stats stats;
struct e1000_phy_info phy_info;
struct e1000_phy_stats phy_stats;

/* Snapshot of PHY registers */
struct e1000_phy_regs phy_regs;

struct e1000_ring test_tx_ring;
struct e1000_ring test_rx_ring;
u32 test_icr;

u32 msg_enable;
unsigned int num_vectors;
struct msix_entry *msix_entries;
int int_mode;
u32 eiac_mask;

u32 eeprom_wol;
u32 wol;
u32 pba;
u32 max_hw_frame_size;

bool fc_autoneg;

unsigned int flags;
unsigned int flags2;
struct work_struct downshift_task;
struct work_struct update_phy_task;
struct work_struct print_hang_task;

int phy_hang_count;

u16 tx_ring_count;
u16 rx_ring_count;

struct hwtstamp_config hwtstamp_config;
struct delayed_work systim_overflow_work;
struct sk_buff *tx_hwtstamp_skb;
unsigned long tx_hwtstamp_start;
struct work_struct tx_hwtstamp_work;
spinlock_t systim_lock; /* protects SYSTIML/H regsters */
struct cyclecounter cc;
struct timecounter tc;
struct ptp_clock *ptp_clock;
struct ptp_clock_info ptp_clock_info;
struct pm_qos_request pm_qos_req;
s32 ptp_delta;

u16 eee_advert;
};

主要流程分析

e1000_clean_rx_irq

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
/**
* e1000_clean_rx_irq - 接收来自网卡的数据,上送到协议栈,此处的clean可以理解为处理中断这一事件,处理一个clean一个
* @rx_ring: Rx descriptor ring
* @work_done: 返回值,报告实际成功处理的item个数
* @work_to_do:硬件报告的当前已经到达的item个数,驱动最多可以处理的item不能大于这个值
**/

static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done, int work_to_do)
{
struct e1000_adapter *adapter = rx_ring->adapter;
struct net_device *netdev = adapter->netdev;
struct pci_dev *pdev = adapter->pdev;
struct e1000_hw *hw = &adapter->hw;
union e1000_rx_desc_extended *rx_desc, *next_rxd;
struct e1000_buffer *buffer_info, *next_buffer;
u32 length, staterr;
unsigned int i;
int cleaned_count = 0;
bool cleaned = false;
unsigned int total_rx_bytes = 0, total_rx_packets = 0;

i = rx_ring->next_to_clean; /* 上一次接收报文结束后,会更新 next_to_clean,该点之前的数据都已接收,资源也已翻新 */
rx_desc = E1000_RX_DESC_EXT(*rx_ring, i); /* 提取 index 为 i 的 rx 方向 desc,也就是下一 clean 点 */
staterr = le32_to_cpu(rx_desc->wb.upper.status_error); /* 硬件大小端转换为CPU大小端 */
buffer_info = &rx_ring->buffer_info[i]; /* desc和buffer一一对应,获取下一clean点的buffer */

while (staterr & E1000_RXD_STAT_DD) { /* 如果 e1000_rx_desc_extended 说 index 为 i 的接收完成的 */
struct sk_buff *skb;

if (*work_done >= work_to_do) /* 查看当前已处理的item是否已经到达硬件报告的上限 */
break;
(*work_done)++; /* 若没有达到上限,接着接收 */
dma_rmb(); /* 读取DMA过来的数据之前,加个读屏障,保证数据一致 */

skb = buffer_info->skb; // 取出 buffer 中封装的 skb
buffer_info->skb = NULL; // 然后把 buffer 和 skb 的联系断掉

// 下面的代码都是为了预取 ↓
prefetch(skb->data - NET_IP_ALIGN); // 手动触发CPU的预取机制,实现对skb中数据的预取
i++; // 继续查看下一个数据buffer,如果有必要也同时预取
if (i == rx_ring->count) // 执行必要的绕回动作
i = 0;
next_rxd = E1000_RX_DESC_EXT(*rx_ring, i);
prefetch(next_rxd);
next_buffer = &rx_ring->buffer_info[i];
// 上面的代码都是为了预取 ↑

cleaned = true;
cleaned_count++;
dma_unmap_single(&pdev->dev, buffer_info->dma, // buffer_info->dma 映射了上面 skb 的 dma 地址,也要同时断掉
adapter->rx_buffer_len, DMA_FROM_DEVICE);
buffer_info->dma = 0;

length = le16_to_cpu(rx_desc->wb.upper.length); // 描述符中应该由硬件写清楚本次接受了多少数据


// 如果buffer中E1000_RXD_STAT_EOP没有置位,说明该报文较大,用了多个desc来储存同一个packet
if (unlikely(!(staterr & E1000_RXD_STAT_EOP)))
adapter->flags2 |= FLAG2_IS_DISCARDING; // 遇到这种情况需要登记到adapter
// 后面处理流程根据前面的等级情况,知道如果确实在处理一个未置位 如果buffer中 E1000_RXD_STAT_EOP 没有置位的desc
if (adapter->flags2 & FLAG2_IS_DISCARDING) {
/* All receives must fit into a single buffer */
e_dbg("Receive packet consumed multiple buffers\n");
/* recycle */
buffer_info->skb = skb; // 需要回挂该skb到 buffer 中
if (staterr & E1000_RXD_STAT_EOP) // 直到收到最后一个置位E1000_RXD_STAT_EOP的desc
adapter->flags2 &= ~FLAG2_IS_DISCARDING; // 才消除之前的登记
goto next_desc; // 处于FLAG2_IS_DISCARDING状态时收到的报文都不处理,对应buffer的skb也不回收
// 数据会由硬件通过最后一个skb上传,对于回挂skb的dma map在alloc_rx_buf中完成
}
// 异常流程,接收回来的数据有问题
if (unlikely((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) &&
!(netdev->features & NETIF_F_RXALL))) {
/* recycle */
buffer_info->skb = skb; //
goto next_desc;
}

/* adjust length to remove Ethernet CRC */
if (!(adapter->flags2 & FLAG2_CRC_STRIPPING)) {
/* If configured to store CRC, don't subtract FCS,
* but keep the FCS bytes out of the total_rx_bytes
* counter
*/
if (netdev->features & NETIF_F_RXFCS)
total_rx_bytes -= 4;
else
length -= 4;
}

total_rx_bytes += length;
total_rx_packets++;

/* code added for copybreak, this should improve
* performance for small packets with large amounts
* of reassembly being done in the stack
*/
if (length < copybreak) {
struct sk_buff *new_skb =
napi_alloc_skb(&adapter->napi, length);
if (new_skb) {
skb_copy_to_linear_data_offset(new_skb,
-NET_IP_ALIGN,
(skb->data -
NET_IP_ALIGN),
(length +
NET_IP_ALIGN));
/* save the skb in buffer_info as good */
buffer_info->skb = skb;
skb = new_skb;
}
/* else just continue with the old one */
}
/* end copybreak code */
skb_put(skb, length); // 调整skb中的数据长度指针,见skb的工作方式和字段含义

/* Receive Checksum Offload */
e1000_rx_checksum(adapter, staterr, skb); // 硬件已经完成checksum的检验,驱动只负责查看是否正常以及记录状态

e1000_rx_hash(netdev, rx_desc->wb.lower.hi_dword.rss, skb); // 不知道干啥的

e1000_receive_skb(adapter, netdev, skb, staterr, // 将刚才的skb上送协议栈
rx_desc->wb.upper.vlan);

next_desc:
rx_desc->wb.upper.status_error &= cpu_to_le32(~0xFF);

/* return some buffers to hardware, one at a time is too slow */
if (cleaned_count >= E1000_RX_BUFFER_WRITE) { // 有时候,接收的desc过多,硬件就会不够用,需要提前给硬件补充
adapter->alloc_rx_buf(rx_ring, cleaned_count,
GFP_ATOMIC);
cleaned_count = 0;
}

/* 前面针对下一desc预取的在此时派上了用场 */
rx_desc = next_rxd;
buffer_info = next_buffer;

staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
}
rx_ring->next_to_clean = i; // 当数据 buffer 处理完了以后,调整 next_to_clean 为最后一个应该接收的 buffer
/*
* static int e1000_desc_unused(struct e1000_ring *ring)
* {
* if (ring->next_to_clean > ring->next_to_use)
* return ring->next_to_clean - ring->next_to_use - 1;
* // 0|------------------|next_to_use********************|next_to_clean------------------------------|count
* return ring->count + ring->next_to_clean - ring->next_to_use - 1;
* // 0|**********|next_to_clean----------|next_to_use************************************************|count
* }
*/
cleaned_count = e1000_desc_unused(rx_ring); // 通过ring中的 next_to_use 和 ↑next_to_clean↑ 统计本次处理完后需要补充的 buffer 数,由于在接收过程中可能已经提前补充过了,所以需要用index来计算

// 前面处理过的那些 buffer 的 skb 被上层拿走了,下面要重新给这些 buffer 挂 skb
if (cleaned_count)
adapter->alloc_rx_buf(rx_ring, cleaned_count, GFP_ATOMIC);

adapter->total_rx_bytes += total_rx_bytes; // 更新存放在adapter中的接收总字节数,
adapter->total_rx_packets += total_rx_packets; // 更新存放在adapter中的接收总报文数
return cleaned;
}

e1000_xmit_frame

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
struct net_device *netdev)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_ring *tx_ring = adapter->tx_ring;
unsigned int first;
unsigned int tx_flags = 0;
unsigned int len = skb_headlen(skb);
unsigned int nr_frags;
unsigned int mss;
int count = 0;
int tso;
unsigned int f;
__be16 protocol = vlan_get_protocol(skb); // 何种链路层报文,如果有vlan,会拆开vlan

if (test_bit(__E1000_DOWN, &adapter->state)) { // 如果当前网口down
dev_kfree_skb_any(skb); // 释放skb(如果只有自己用,释放内存;如果还有别人用,减引用)
return NETDEV_TX_OK;
}

if (skb->len <= 0) { // 不正常的报文
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}

/* The minimum packet size with TCTL.PSP set is 17 bytes so
* pad skb in order to meet this minimum size requirement
*/
if (skb_put_padto(skb, 17)) // 填充过小的报文,链路层有最小包长限制
return NETDEV_TX_OK;

mss = skb_shinfo(skb)->gso_size; // 切割大小
if (mss) {
u8 hdr_len;

/* TSO Workaround for 82571/2/3 Controllers -- if skb->data
* points to just header, pull a few bytes of payload from
* frags into skb->data
*/
hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
/* we do this workaround for ES2LAN, but it is un-necessary,
* avoiding it could save a lot of cycles
*/
if (skb->data_len && (hdr_len == len)) {
unsigned int pull_size;

pull_size = min_t(unsigned int, 4, skb->data_len);
if (!__pskb_pull_tail(skb, pull_size)) {
e_err("__pskb_pull_tail failed.\n");
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
len = skb_headlen(skb);
}
}

/* reserve a descriptor for the offload context */
if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL))
count++;
count++;

count += DIV_ROUND_UP(len, adapter->tx_fifo_limit);

nr_frags = skb_shinfo(skb)->nr_frags;
for (f = 0; f < nr_frags; f++)
count += DIV_ROUND_UP(skb_frag_size(&skb_shinfo(skb)->frags[f]),
adapter->tx_fifo_limit);

if (adapter->hw.mac.tx_pkt_filtering)
e1000_transfer_dhcp_info(adapter, skb);

/* need: count + 2 desc gap to keep tail from touching
* head, otherwise try next time
*/
if (e1000_maybe_stop_tx(tx_ring, count + 2))
return NETDEV_TX_BUSY;

if (skb_vlan_tag_present(skb)) {
tx_flags |= E1000_TX_FLAGS_VLAN;
tx_flags |= (skb_vlan_tag_get(skb) <<
E1000_TX_FLAGS_VLAN_SHIFT);
}

first = tx_ring->next_to_use;

tso = e1000_tso(tx_ring, skb, protocol);
if (tso < 0) {
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}

if (tso)
tx_flags |= E1000_TX_FLAGS_TSO;
else if (e1000_tx_csum(tx_ring, skb, protocol))
tx_flags |= E1000_TX_FLAGS_CSUM;

/* Old method was to assume IPv4 packet by default if TSO was enabled.
* 82571 hardware supports TSO capabilities for IPv6 as well...
* no longer assume, we must.
*/
if (protocol == htons(ETH_P_IP))
tx_flags |= E1000_TX_FLAGS_IPV4;

if (unlikely(skb->no_fcs))
tx_flags |= E1000_TX_FLAGS_NO_FCS;

/* if count is 0 then mapping error has occurred */
count = e1000_tx_map(tx_ring, skb, first, adapter->tx_fifo_limit,
nr_frags);
if (count) {
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
(adapter->flags & FLAG_HAS_HW_TIMESTAMP)) {
if (!adapter->tx_hwtstamp_skb) {
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
tx_flags |= E1000_TX_FLAGS_HWTSTAMP;
adapter->tx_hwtstamp_skb = skb_get(skb);
adapter->tx_hwtstamp_start = jiffies;
schedule_work(&adapter->tx_hwtstamp_work);
} else {
adapter->tx_hwtstamp_skipped++;
}
}

skb_tx_timestamp(skb);

netdev_sent_queue(netdev, skb->len);
e1000_tx_queue(tx_ring, tx_flags, count);
/* Make sure there is space in the ring for the next send. */
e1000_maybe_stop_tx(tx_ring,
(MAX_SKB_FRAGS *
DIV_ROUND_UP(PAGE_SIZE,
adapter->tx_fifo_limit) + 2));

if (!skb->xmit_more ||
netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) {
if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA)
e1000e_update_tdt_wa(tx_ring,
tx_ring->next_to_use);
else
writel(tx_ring->next_to_use, tx_ring->tail);

/* we need this if more than one processor can write
* to our tail at a time, it synchronizes IO on
*IA64/Altix systems
*/
mmiowb();
}
} else {
dev_kfree_skb_any(skb);
tx_ring->buffer_info[first].time_stamp = 0;
tx_ring->next_to_use = first;
}

return NETDEV_TX_OK;
}