Linux虚拟网卡TUN/TAP

2024-02-05 20:58
文章标签 linux 虚拟 网卡 tap tun

本文主要是介绍Linux虚拟网卡TUN/TAP,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!

Linux虚拟网卡TUN/TAP



TUN/TAP 提供了给用户空间程序的包的接收和传输,它可以看成是简单的点对点设备或是
以太网设备。它不是从物理设备接收包,而是从用户空间程序接收包。它发送包不是通过物
理设备来发送包,而是将这些包写入用户空间程序来发送。
为了应用这个驱动,应用程序需要打开/dev/net/tun 设备(字符设备),然后发出一个控
制(ioctl)来注册一个网卡设备,一个网络设备将命名为tunXX 或tapXX.依赖于你所设定的标志
位。当应用程序关闭文件描述符的时候,网络设备和其他相关的路由将会消失。
依赖于所选择的设备类型,用户空间的应用程序需要读写IP 包(用tun 设备)或以太网包(用
tap 设备).至于具体用那种设备,依赖于传递给ioctl 函数的标志参数.
Tun/tap 设备的源码包地址是http://vtun.sourceforge.net/tun

包含两个简单的例子,用于显示如何使用tun 设备和tap 设备。两个程序就像是这两个网
络设备接口间的网桥。
br_select.c ‐ bridge based on select system call.
br_sigio.c ‐ bridge based on async io and SIGIO signal.
当然,最好的例子是 is VTun http://vtun.sourceforge.net :))

module_init(tun_init);
module_exit(tun_cleanup);
/* Network device part of the driver */
static LIST_HEAD(tun_dev_list);
static const struct ethtool_ops tun_ethtool_ops;

主要的数据结构
struct miscdevice
struct miscdevice {
int minor;
const char *name;
const struct file_operations *fops;
struct list_head list;
struct device *parent;
struct device *this_device;
};
struct tun_struct
struct tun_struct {
struct list_head list;
unsigned long flags;// //区分tun 和tap 设备

int attached;
uid_t owner;
wait_queue_head_t read_wait;// //等待队列

struct sk_buff_head readq; // //网络缓冲区队列

struct net_device *dev; // //linux 抽象网络设备结构(结构是linux 内核提供的

统一网络设备结构,定义了系统统一的访问接口。)
struct net_device_stats stats; // //网卡状态信息结构

struct fasync_struct *fasync;// //文件异步通知结构

unsigned long if_flags;
u8 dev_addr[ETH_ALEN];
u32 chr_filter[2];
u32 net_filter[2];
#ifdef TUN_DEBUG
int debug;
#endif
};
Struct ifreq
/*
* Interface request structure used for socket
* ioctl's. All interface ioctl's must have parameter
* definitions which begin with ifr_name. The
* remainder may be interface specific.
*/
struct ifreq
{
#define IFHWADDRLEN 6
union
{
char ifrn_name[IFNAMSIZ]; /* if name, e.g. "en0" */
} ifr_ifrn;
union {
struct sockaddr ifru_addr;
struct sockaddr ifru_dstaddr;
struct sockaddr ifru_broadaddr;
struct sockaddr ifru_netmask;
struct sockaddr ifru_hwaddr;
short ifru_flags;
int ifru_ivalue;
int ifru_mtu;
struct ifmap ifru_map;
char ifru_slave[IFNAMSIZ]; /* Just fits the size */
char ifru_newname[IFNAMSIZ];
void __user * ifru_data;
struct if_settings ifru_settings;
} ifr_ifru;
};
模块的初始化(tun_init)
static int __init tun_init(void)
{
int ret = 0;
printk(KERN_INFO "tun: %s, %s/n", DRV_DESCRIPTION, DRV_VERSION);
printk(KERN_INFO "tun: %s/n", DRV_COPYRIGHT);
ret = misc_register(&tun_miscdev);
if (ret)
printk(KERN_ERR "tun: Can't register misc device %d/n", TUN_MINOR);
return ret;
}
static struct miscdevice tun_miscdev = {
.minor = TUN_MINOR,
.name = "tun",
.fops = &tun_fops,
};
static const struct file_operations tun_fops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.read = do_sync_read,
.aio_read = tun_chr_aio_read,
.write = do_sync_write,
.aio_write = tun_chr_aio_write,
.poll = tun_chr_poll,
.ioctl = tun_chr_ioctl,
.open = tun_chr_open,
.release = tun_chr_close,
.fasync = tun_chr_fasync
};
misc_register
//在内核中利用misc_register() 函数将该驱动注册为非标准字符设备驱动,提供字符设备具

有的各种程序接口。
int misc_register(struct miscdevice * misc)
{
struct miscdevice *c;
dev_t dev;
int err = 0;
INIT_LIST_HEAD(&misc‐>list);
mutex_lock(&misc_mtx);
list_for_each_entry(c, &misc_list, list) {
if (c‐>minor == misc‐>minor) {
mutex_unlock(&misc_mtx);
return ‐EBUSY;
}
}
if (misc‐>minor == MISC_DYNAMIC_MINOR) {
int i = DYNAMIC_MINORS;
while (‐‐i >= 0)
if ( (misc_minors[i>>3] & (1 << (i&7))) == 0)
break;
if (i<0) {
mutex_unlock(&misc_mtx);
return ‐EBUSY;
}
misc‐>minor = i;
}
if (misc‐>minor < DYNAMIC_MINORS)
misc_minors[misc‐>minor >> 3] |= 1 << (misc‐>minor & 7);
dev = MKDEV(MISC_MAJOR, misc‐>minor);
misc‐>this_device = device_create(misc_class, misc‐>parent, dev,
"%s", misc‐>name);
if (IS_ERR(misc‐>this_device)) {
err = PTR_ERR(misc‐>this_device);
goto out;
}
/*
* Add it to the front, so that later devices can "override"
* earlier defaults
*/
list_add(&misc‐>list, &misc_list);
out:
mutex_unlock(&misc_mtx);
return err;
}
tun 设备的操作(系统调用)
tun_chr_open(打开设备时调用)
当打开一个tun/tap 设备时,open 函数将调用tun_chr_open()函数,其中将完成一些重要的初始化过
程,
初始化函数以及网络缓冲区链表的初始化和等待队列的初始化
static int tun_chr_open(struct inode *inode, struct file * file)
{
DBG1(KERN_INFO "tunX: tun_chr_open/n");
file‐>private_data = NULL;//初始化设备文件的内容

return 0;
}
tun_chr_ioctl(设备的控制调用接口)
控制调用接口:
Cmd=
.. TUNSETIFF
.. _IOC_TYPE(cmd) == 0x89
.. TUNSETNOCSUM
.. TUNSETPERSIST
.. TUNSETOWNER
.. TUNSETLINK
.. TUNSETDEBUG
.. SIOCGIFFLAGS
.. SIOCSIFFLAGS
.. SIOCGIFHWADDR
.. SIOCSIFHWADDR
.. SIOCADDMULTI
.. SIOCDELMULTI
Tun/tap 驱动中网卡的注册被嵌入了字符驱动的ioctl 例程中,它是通过对字符设备文件描述符利用自
定义的ioctl 设置标志 TUNSETIFF 完成网卡的注册的。
static int tun_chr_ioctl(struct inode *inode, struct file *file,unsigned int cmd, unsigned long arg)
{
struct tun_struct *tun = file‐>private_data;
void __user* argp = (void __user*)arg;
struct ifreq ifr;
if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
if (copy_from_user(&ifr, argp, sizeof ifr))//拷贝用户区的网络设备配置。在用户区已

经分配了ifreq 结构的值和配置值,
return ‐EFAULT;
if (cmd == TUNSETIFF && !tun) {//字符设备文件的数据不是空的则

int err;
ifr.ifr_name[IFNAMSIZ‐] = '/0';
rtnl_lock();//在<linux/rlnetlink.h>中定义

err = tun_set_iff(file, &ifr);
rtnl_unlock();
if (err)
return err;
if (copy_to_user(argp, &ifr, sizeof(ifr)))//把配置数据拷贝到用户区

return ‐EFAULT;
return 0;
}
if (!tun)//tun 设备错误

return ‐EBADFD;
DBG(KERN_INFO "%s: tun_chr_ioctl cmd %d/n", tun‐>dev‐>name, cmd);
switch (cmd) {
case TUNSETNOCSUM:
/* Disable/Enable checksum */
if (arg)
tun‐>flags |= TUN_NOCHECKSUM;
else
tun‐>flags &= ~TUN_NOCHECKSUM;
DBG(KERN_INFO "%s: checksum %s/n",
tun‐>dev‐>name, arg ? "disabled" : "enabled");
break;
case TUNSETPERSIST:
/* Disable/Enable persist mode */
if (arg)
tun‐>flags |= TUN_PERSIST;
else
tun‐>flags &= ~TUN_PERSIST;
DBG(KERN_INFO "%s: persist %s/n",
tun‐>dev‐>name, arg ? "disabled" : "enabled");
break;
case TUNSETOWNER:
/* Set owner of the device */
tun‐>owner = (uid_t) arg;
DBG(KERN_INFO "%s: owner set to %d/n", tun‐>dev‐>name, tun‐>owner);
break;
case TUNSETLINK:
/* Only allow setting the type when the interface is down */
if (tun‐>dev‐>flags & IFF_UP) {
DBG(KERN_INFO "%s: Linktype set failed because interface is up/n",
tun‐>dev‐>name);
return ‐EBUSY;
} else {
tun‐>dev‐>type = (int) arg;
DBG(KERN_INFO "%s: linktype set to %d/n", tun‐>dev‐>name, tun‐>dev‐>type);
}
break;
#ifdef TUN_DEBUG
case TUNSETDEBUG:
tun‐>debug = arg;
break;
#endif
case SIOCGIFFLAGS:
ifr.ifr_flags = tun‐>if_flags;
if (copy_to_user( argp, &ifr, sizeof ifr))
return ‐EFAULT;
return 0;
case SIOCSIFFLAGS:
/** Set the character device's interface flags. Currently only
* IFF_PROMISC and IFF_ALLMULTI are used. */
tun‐>if_flags = ifr.ifr_flags;
DBG(KERN_INFO "%s: interface flags 0x%lx/n",
tun‐>dev‐>name, tun‐>if_flags);
return 0;
case SIOCGIFHWADDR:
/* Note: the actual net device's address may be different */
memcpy(ifr.ifr_hwaddr.sa_data, tun‐>dev_addr,
min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun‐>dev_addr));
if (copy_to_user( argp, &ifr, sizeof ifr))
return ‐EFAULT;
return 0;
case SIOCSIFHWADDR:
{
/* try to set the actual net device's hw address */
int ret = dev_set_mac_address(tun‐>dev, &ifr.ifr_hwaddr);
if (ret == 0) {
/** Set the character device's hardware address. This is used when
* filtering packets being sent from the network device to the character
* device. */
memcpy(tun‐>dev_addr, ifr.ifr_hwaddr.sa_data,
min(sizeof ifr.ifr_hwaddr.sa_data, sizeof tun‐>dev_addr));
DBG(KERN_DEBUG "%s: set hardware address: %x:%x:%x:%x:%x:%x/n",
tun‐>dev‐>name,
tun‐>dev_addr[0], tun‐>dev_addr[1], tun‐>dev_addr[2],
tun‐>dev_addr[3], tun‐>dev_addr[4], tun‐>dev_addr[5]);
}
return ret;
}
case SIOCADDMULTI:
/** Add the specified group to the character device's multicast filter
* list. */
add_multi(tun‐>chr_filter, ifr.ifr_hwaddr.sa_data);
DBG(KERN_DEBUG "%s: add multi: %x:%x:%x:%x:%x:%x/n",
tun‐>dev‐>name,
(u8)ifr.ifr_hwaddr.sa_data[0], (u8)ifr.ifr_hwaddr.sa_data[1],
(u8)ifr.ifr_hwaddr.sa_data[2], (u8)ifr.ifr_hwaddr.sa_data[3],
(u8)ifr.ifr_hwaddr.sa_data[4], (u8)ifr.ifr_hwaddr.sa_data[5]);
return 0;
case SIOCDELMULTI:
/** Remove the specified group from the character device's multicast
* filter list. */
del_multi(tun‐>chr_filter, ifr.ifr_hwaddr.sa_data);
DBG(KERN_DEBUG "%s: del multi: %x:%x:%x:%x:%x:%x/n",
tun‐>dev‐>name,
(u8)ifr.ifr_hwaddr.sa_data[0], (u8)ifr.ifr_hwaddr.sa_data[1],
(u8)ifr.ifr_hwaddr.sa_data[2], (u8)ifr.ifr_hwaddr.sa_data[3],
(u8)ifr.ifr_hwaddr.sa_data[4], (u8)ifr.ifr_hwaddr.sa_data[5]);
return 0;
default:
return ‐EINVAL;
};
return 0;
}
tun_chr_aio_read(异步读)(从tun 设备中读取数据)
static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
unsigned long count, loff_t pos)
{
struct file *file = iocb‐>ki_filp;
struct tun_struct *tun = file‐>private_data;
DECLARE_WAITQUEUE(wait, current);
struct sk_buff *skb;
ssize_t len, ret = 0;
if (!tun)
return ‐EBADFD;
DBG(KERN_INFO "%s: tun_chr_read/n", tun‐>dev‐>name);
len = iov_total(iv, count);
if (len < 0)
return ‐EINVAL;
add_wait_queue(&tun‐>read_wait, &wait);
while (len) {
const u8 ones[ ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
u8 addr[ ETH_ALEN];
int bit_nr;
current‐>state = TASK_INTERRUPTIBLE;
/* Read frames from the queue */
if (!(skb=skb_dequeue(&tun‐>readq))) {
if (file‐>f_flags & O_NONBLOCK) {
ret = ‐EAGAIN;
break;
}
if (signal_pending(current)) {
ret = ‐ERESTARTSYS;
break;
}
/* Nothing to read, let's sleep */
schedule();
continue;
}
netif_wake_queue(tun‐>dev);
/** Decide whether to accept this packet. This code is designed to
* behave identically to an Ethernet interface. Accept the packet if
* ‐ we are promiscuous.
* ‐ the packet is addressed to us.
* ‐ the packet is broadcast.
* ‐ the packet is multicast and
* ‐ we are multicast promiscous.
* ‐ we belong to the multicast group.
*/
skb_copy_from_linear_data(skb, addr, min_t(size_t, sizeof addr,
skb‐>len));
bit_nr = ether_crc(sizeof addr, addr) >> 26;
if ((tun‐>if_flags & IFF_PROMISC) ||
memcmp(addr, tun‐>dev_addr, sizeof addr) == 0 ||
memcmp(addr, ones, sizeof addr) == 0 ||
(((addr[0] == 1 && addr[1] == 0 && addr[2] == 0x5e) ||
(addr[0] == 0x33 && addr[1] == 0x33)) &&
((tun‐>if_flags & IFF_ALLMULTI) ||
(tun‐>chr_filter[bit_nr >> 5] & (1 << (bit_nr & 31)))))) {
DBG(KERN_DEBUG "%s: tun_chr_readv: accepted: %x:%x:%x:%x:%x:%x/n",
tun‐>dev‐>name, addr[0], addr[1], addr[2],
addr[3], addr[4], addr[5]);
ret = tun_put_user(tun, skb, (struct iovec *) iv, len);
kfree_skb(skb);
break;
} else {
DBG(KERN_DEBUG "%s: tun_chr_readv: rejected: %x:%x:%x:%x:%x:%x/n",
tun‐>dev‐>name, addr[0], addr[1], addr[2],
addr[3], addr[4], addr[5]);
kfree_skb(skb);
continue;
}
}
current‐>state = TASK_RUNNING;
remove_wait_queue(&tun‐>read_wait, &wait);
return ret;
}
skb_dequeue(src/net/core/skbuff.c)
/**
* skb_dequeue ‐ remove from the head of the queue
* @list: list to dequeue from
*
* Remove the head of the list. The list lock is taken so the function
* may be used safely with other locking list functions. The head item is
* returned or %NULL if the list is empty.
*/
struct sk_buff *skb_dequeue(struct sk_buff_head *list)
{
unsigned long flags;
struct sk_buff *result;
spin_lock_irqsave(&list‐>lock, flags);
result = __skb_dequeue(list);
spin_unlock_irqrestore(&list‐>lock, flags);
return result;
}
__skb_dequeue
/**
* __skb_dequeue ‐ remove from the head of the queue
* @list: list to dequeue from
*
* Remove the head of the list. This function does not take any locks
* so must be used with appropriate locks held only. The head item is
* returned or %NULL if the list is empty.
*/
extern struct sk_buff *skb_dequeue(struct sk_buff_head *list);
static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
{
struct sk_buff *next, *prev, *result;
prev = (struct sk_buff *) list;
next = prev‐>next;
result = NULL;
if (next != prev) {
result = next;
next = next‐>next;
list‐>qlen‐‐;
next‐>prev = prev;
prev‐>next = next;
result‐>next = result‐>prev = NULL;
}
return result;
}
tun_put_user
/* Put packet to the user space buffer */
static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
struct sk_buff *skb,
struct iovec *iv, int len)
{
struct tun_pi pi = { 0, skb‐>protocol };
ssize_t total = 0;
if (!(tun‐>flags & TUN_NO_PI)) {
if ((len ‐= sizeof(pi)) < 0)
return ‐EINVAL;
if (len < skb‐>len) {
/* Packet will be striped */
pi.flags |= TUN_PKT_STRIP;
}
if (memcpy_toiovec(iv, (void *) &pi, sizeof(pi)))
return ‐EFAULT;
total += sizeof(pi);
}
len = min_t(int, skb‐>len, len);
skb_copy_datagram_iovec(skb, 0, iv, len);
total += len;
tun‐>stats.tx_packets++;
tun‐>stats.tx_bytes += len;
return total;
}
tun_chr_aio_write(把数据写入到tun 设备中)
static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
unsigned long count, loff_t pos)
{
struct tun_struct *tun = iocb‐>ki_filp‐>private_data;
if (!tun)
return ‐EBADFD;
DBG(KERN_INFO "%s: tun_chr_write %ld/n", tun‐>dev‐>name, count);
return tun_get_user(tun, (struct iovec *) iv, iov_total(iv, count));
}
tun_get_user
/* Get packet from user space buffer */
static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t count)
{
struct tun_pi pi = { 0, __constant_htons(ETH_P_IP) };
struct sk_buff *skb;
size_t len = count, align = 0;
if (!(tun‐>flags & TUN_NO_PI)) {
if ((len ‐= sizeof(pi)) > count)
return ‐EINVAL;
if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi)))
return ‐EFAULT;
}
if ((tun‐>flags & TUN_TYPE_MASK) == TUN_TAP_DEV)
align = NET_IP_ALIGN;
if (!(skb = alloc_skb(len + align, GFP_KERNEL))) {
tun‐>stats.rx_dropped++;
return ‐ENOMEM;
}
if (align)
skb_reserve(skb, align);
if (memcpy_fromiovec(skb_put(skb, len), iv, len)) {
tun‐>stats.rx_dropped++;
kfree_skb(skb);
return ‐EFAULT;
}
switch (tun‐>flags & TUN_TYPE_MASK) {
case TUN_TUN_DEV:
skb_reset_mac_header(skb);
skb‐>protocol = pi.proto;
skb‐>dev = tun‐>dev;
break;
case TUN_TAP_DEV:
skb‐>protocol = eth_type_trans(skb, tun‐>dev);
break;
};
if (tun‐>flags & TUN_NOCHECKSUM)
skb‐>ip_summed = CHECKSUM_UNNECESSARY;
netif_rx_ni(skb);
tun‐>dev‐>last_rx = jiffies;
tun‐>stats.rx_packets++;
tun‐>stats.rx_bytes += len;
return count;
}


这篇关于Linux虚拟网卡TUN/TAP的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!



http://www.chinasem.cn/article/682150

相关文章

Linux中shell解析脚本的通配符、元字符、转义符说明

《Linux中shell解析脚本的通配符、元字符、转义符说明》:本文主要介绍shell通配符、元字符、转义符以及shell解析脚本的过程,通配符用于路径扩展,元字符用于多命令分割,转义符用于将特殊... 目录一、linux shell通配符(wildcard)二、shell元字符(特殊字符 Meta)三、s

Linux之软件包管理器yum详解

《Linux之软件包管理器yum详解》文章介绍了现代类Unix操作系统中软件包管理和包存储库的工作原理,以及如何使用包管理器如yum来安装、更新和卸载软件,文章还介绍了如何配置yum源,更新系统软件包... 目录软件包yumyum语法yum常用命令yum源配置文件介绍更新yum源查看已经安装软件的方法总结软

linux报错INFO:task xxxxxx:634 blocked for more than 120 seconds.三种解决方式

《linux报错INFO:taskxxxxxx:634blockedformorethan120seconds.三种解决方式》文章描述了一个Linux最小系统运行时出现的“hung_ta... 目录1.问题描述2.解决办法2.1 缩小文件系统缓存大小2.2 修改系统IO调度策略2.3 取消120秒时间限制3

Linux alias的三种使用场景方式

《Linuxalias的三种使用场景方式》文章介绍了Linux中`alias`命令的三种使用场景:临时别名、用户级别别名和系统级别别名,临时别名仅在当前终端有效,用户级别别名在当前用户下所有终端有效... 目录linux alias三种使用场景一次性适用于当前用户全局生效,所有用户都可调用删除总结Linux

Linux:alias如何设置永久生效

《Linux:alias如何设置永久生效》在Linux中设置别名永久生效的步骤包括:在/root/.bashrc文件中配置别名,保存并退出,然后使用source命令(或点命令)使配置立即生效,这样,别... 目录linux:alias设置永久生效步骤保存退出后功能总结Linux:alias设置永久生效步骤

Mysql虚拟列的使用场景

《Mysql虚拟列的使用场景》MySQL虚拟列是一种在查询时动态生成的特殊列,它不占用存储空间,可以提高查询效率和数据处理便利性,本文给大家介绍Mysql虚拟列的相关知识,感兴趣的朋友一起看看吧... 目录1. 介绍mysql虚拟列1.1 定义和作用1.2 虚拟列与普通列的区别2. MySQL虚拟列的类型2

Linux使用fdisk进行磁盘的相关操作

《Linux使用fdisk进行磁盘的相关操作》fdisk命令是Linux中用于管理磁盘分区的强大文本实用程序,这篇文章主要为大家详细介绍了如何使用fdisk进行磁盘的相关操作,需要的可以了解下... 目录简介基本语法示例用法列出所有分区查看指定磁盘的区分管理指定的磁盘进入交互式模式创建一个新的分区删除一个存

Linux使用dd命令来复制和转换数据的操作方法

《Linux使用dd命令来复制和转换数据的操作方法》Linux中的dd命令是一个功能强大的数据复制和转换实用程序,它以较低级别运行,通常用于创建可启动的USB驱动器、克隆磁盘和生成随机数据等任务,本文... 目录简介功能和能力语法常用选项示例用法基础用法创建可启动www.chinasem.cn的 USB 驱动

高效管理你的Linux系统: Debian操作系统常用命令指南

《高效管理你的Linux系统:Debian操作系统常用命令指南》在Debian操作系统中,了解和掌握常用命令对于提高工作效率和系统管理至关重要,本文将详细介绍Debian的常用命令,帮助读者更好地使... Debian是一个流行的linux发行版,它以其稳定性、强大的软件包管理和丰富的社区资源而闻名。在使用

Linux Mint Xia 22.1重磅发布: 重要更新一览

《LinuxMintXia22.1重磅发布:重要更新一览》Beta版LinuxMint“Xia”22.1发布,新版本基于Ubuntu24.04,内核版本为Linux6.8,这... linux Mint 22.1「Xia」正式发布啦!这次更新带来了诸多优化和改进,进一步巩固了 Mint 在 Linux 桌面