本文将介绍一个 Linux 下网络设备驱动的简单实现,代码基于 Linux Device Driversnull 驱动,介绍该网络设备的注册、打开、写入数据、读出数据等操作,所有示例代码可以在我的 Github 中找到。

设备注册与卸载

基本流程

每个网卡,无论是物理还是虚拟的网卡,都必须有一个 net_device 结构体,这个结构体是在网卡驱动中动态分配创建的,网卡驱动会为每个新的网络接口在一个全局的网络设备列表里插入本网卡对应的 net_device

初始化 snull 模块需要先创建 net_device 网络设备,正如在 net_device 这篇博客 中所述,设备初始化流程如下所示:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
struct net_device *snull_devs[2];
int snull_init_module(void)
{
    int result, i, ret = -ENOMEM;

    snull_interrupt = use_napi ? snull_napi_interrupt : snull_regular_interrupt;

    /* Allocate the devices */
    snull_devs[0] = alloc_netdev(sizeof(struct snull_priv), "sn%d",
            NET_NAME_UNKNOWN, snull_init);
    snull_devs[1] = alloc_netdev(sizeof(struct snull_priv), "sn%d",
            NET_NAME_UNKNOWN, snull_init);
    if (snull_devs[0] == NULL || snull_devs[1] == NULL)
        goto out;

    ret = -ENODEV;
    for (i = 0; i < 2;  i++)
        if ((result = register_netdev(snull_devs[i])))
            printk("snull: error %i registering device \"%s\"\n",
                    result, snull_devs[i]->name);
        else
            ret = 0;
   out:
    if (ret)
        snull_cleanup();
    return ret;
}

在卸载 snull 模块时,执行操作如下:

  • unregister_netdev 从系统中删除了接口
  • snull 自己的清除函数
  • free_netdevnet_device 结构体返回给系统。如果还在什么其他地方对该设备的应用,则它将继续存在,但是驱动程序并不需要关注这一点。
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
void snull_cleanup(void)
{
    int i;

    for (i = 0; i < 2;  i++) {
        if (snull_devs[i]) {
            unregister_netdev(snull_devs[i]);
            snull_teardown_pool(snull_devs[i]);
            free_netdev(snull_devs[i]); //will call netif_napi_del()
        }
    }
    return;
}

初始化设备

在申请 net_devices 时使用了 snull_init 函数,其主要用于初始化设备,这里主要是对 net_device 的一些成员进行了初始化。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
/*
 * The init function (sometimes called probe).
 * It is invoked by register_netdev()
 */
void snull_init(struct net_device *dev)
{
    struct snull_priv *priv;
#if 0
        /*
     * Make the usual checks: check_region(), probe irq, ...  -ENODEV
     * should be returned if no device found.  No resource should be
     * grabbed: this is done on open().
     */
#endif

        /*
     * Then, assign other fields in dev, using ether_setup() and some
     * hand assignments
     */
    ether_setup(dev); /* assign some of the fields */
    dev->watchdog_timeo = timeout;
    dev->netdev_ops = &snull_netdev_ops;
    dev->header_ops = &snull_header_ops;
    /* keep the default flags, just add NOARP */
    dev->flags           |= IFF_NOARP;
    dev->features        |= NETIF_F_HW_CSUM;

    /*
     * Then, initialize the priv field. This encloses the statistics
     * and a few private fields.
     */
    priv = netdev_priv(dev);
    memset(priv, 0, sizeof(struct snull_priv));
    if (use_napi) {
        netif_napi_add(dev, &priv->napi, snull_poll,2);
    }
    spin_lock_init(&priv->lock);
    priv->dev = dev;

    snull_rx_ints(dev, 1);        /* enable receive interrupts */
    snull_setup_pool(dev);
}

在这里面,通过赋值 netdev_ops 函数实现了 snull 模块自己的网络设备操作函数:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
static const struct net_device_ops snull_netdev_ops = {
    .ndo_open            = snull_open,
    .ndo_stop            = snull_release,
    .ndo_start_xmit      = snull_tx,
    .ndo_do_ioctl        = snull_ioctl,
    .ndo_set_config      = snull_config,
    .ndo_get_stats       = snull_stats,
    .ndo_change_mtu      = snull_change_mtu,
    .ndo_tx_timeout      = snull_tx_timeout,
};

Private Data

snull_init 中初始化了 priv 字段,该成员的作用和字符驱动程序中的 private_data 指针的作用类似。处于性能和灵活性方面的考虑,不鼓励直接访问 priv 成员。当驱动程序需要访问私有数据指针式,应当使用 netdev_priv 函数,如上所示。

1
struct snull_priv *priv = netdev_priv(dev);

下面展示了 snull 模块中具体的私有数据的数据结构,这个结构包含了一个 net_device_stats 结构的实例,它是保存接口统计信息的标准地方。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
/*
 * A structure representing an in-flight packet.
 */
struct snull_packet {
    struct snull_packet *next;
    struct net_device *dev;
    int    datalen;
    u8 data[ETH_DATA_LEN];
};

/*
 * This structure is private to each device. It is used to pass
 * packets in and out, so there is place for a packet
 */
struct snull_priv {
    struct net_device_stats stats;
    int status;
    struct snull_packet *ppool;
    struct snull_packet *rx_queue;  /* List of incoming packets */
    int rx_int_enabled;
    int tx_packetlen;
    u8 *tx_packetdata;
    struct sk_buff *skb;
    spinlock_t lock;
    struct net_device *dev;
    struct napi_struct napi;
};

Packet Pool

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
/*
 * Set up a device's packet pool.
 */
void snull_setup_pool(struct net_device *dev)
{
    struct snull_priv *priv = netdev_priv(dev);
    int i;
    struct snull_packet *pkt;

    priv->ppool = NULL;
    for (i = 0; i < pool_size; i++) {
        pkt = kmalloc (sizeof (struct snull_packet), GFP_KERNEL);
        if (pkt == NULL) {
            printk (KERN_NOTICE "Ran out of memory allocating packet pool\n");
            return;
        }
        pkt->dev = dev;
        pkt->next = priv->ppool;
        priv->ppool = pkt;
    }
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
void snull_teardown_pool(struct net_device *dev)
{
    struct snull_priv *priv = netdev_priv(dev);
    struct snull_packet *pkt;

    while ((pkt = priv->ppool)) {
        priv->ppool = pkt->next;
        kfree (pkt);
        /* FIXME - in-flight packets ? */
    }
}

设备打开与关闭

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
int snull_open(struct net_device *dev)
{
    /* request_region(), request_irq(), ....  (like fops->open) */

    /*
     * Assign the hardware address of the board: use "\0SNULx", where
     * x is 0 or 1. The first byte is '\0' to avoid being a multicast
     * address (the first byte of multicast addrs is odd).
     */
    memcpy(dev->dev_addr, "\0SNUL0", ETH_ALEN);
    if (dev == snull_devs[1])
        dev->dev_addr[ETH_ALEN-1]++; /* \0SNUL1 */
    if (use_napi) {
        struct snull_priv *priv = netdev_priv(dev);
        napi_enable(&priv->napi);
    }
    netif_start_queue(dev);
    return 0;
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
int snull_release(struct net_device *dev)
{
    /* release ports, irq and such -- like fops->close */

    netif_stop_queue(dev); /* can't transmit any more */
        if (use_napi) {
                struct snull_priv *priv = netdev_priv(dev);
                napi_disable(&priv->napi);
        }
    return 0;
}

数据包发送

基本流程

当内核想要发送一个数据包时,会调用驱动程序的 ndo_start_transmit 函数将数据放入外发队列。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
/*
 * Transmit a packet (called by the kernel)
 */
int snull_tx(struct sk_buff *skb, struct net_device *dev)
{
    int len;
    char *data, shortpkt[ETH_ZLEN];
    struct snull_priv *priv = netdev_priv(dev);

    data = skb->data;
    len = skb->len;
    if (len < ETH_ZLEN) {
        memset(shortpkt, 0, ETH_ZLEN);
        memcpy(shortpkt, skb->data, skb->len);
        len = ETH_ZLEN;
        data =           ;
    }
    netif_trans_update(dev);

    /* Remember the skb, so we can free it at interrupt time */
    priv->skb = skb;

    /* actual deliver of data is device-specific, and not shown here */
    snull_hw_tx(data, len, dev);

    return 0; /* Our simple device can not fail */
}

控制并发传输

传输超时

Scatter/Gather I/O

数据包接收

中断处理例程

NAPI 接收

链路状态的改变

MAC 地址解析

定制 ioctl 信息

参考资料