socket fs(2)

2024-06-03 16:08
文章标签 fs socket

本文主要是介绍socket fs(2),希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!

/************************************************************************************/

socketfs的创建

sock_init ->
{
    register_filesystem(&sock_fs_type);
    sock_mnt = kern_mount(&sock_fs_type);
}
static struct file_system_type sock_fs_type = {
    .name =        "sockfs",
    .mount =    sockfs_mount,
    .kill_sb =    kill_anon_super,
};

kern_mount -> kern_mount_data -> vfs_kern_mount -> mount_fs ->type->mount();
static struct dentry *sockfs_mount(struct file_system_type *fs_type,
             int flags, const char *dev_name, void *data)
{
    return mount_pseudo(fs_type, "socket:", &sockfs_ops,
        &sockfs_dentry_operations, SOCKFS_MAGIC);
}

static const struct super_operations sockfs_ops = {
    .alloc_inode    = sock_alloc_inode,
    .destroy_inode    = sock_destroy_inode,
    .statfs        = simple_statfs,
};

static const struct dentry_operations sockfs_dentry_operations = {
    .d_dname  = sockfs_dname,
};


/*
 * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
 * will never be mountable)
 */
struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
    const struct super_operations *ops,
    const struct dentry_operations *dops, unsigned long magic)
{
    struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
    struct dentry *dentry;
    struct inode *root;
    struct qstr d_name = {.name = name, .len = strlen(name)};

    if (IS_ERR(s))
        return ERR_CAST(s);

    s->s_flags = MS_NOUSER;
    s->s_maxbytes = MAX_LFS_FILESIZE;
    s->s_blocksize = PAGE_SIZE;
    s->s_blocksize_bits = PAGE_SHIFT;
    s->s_magic = magic;
    s->s_op = ops ? ops : &simple_super_operations;
    s->s_time_gran = 1;
    root = new_inode(s);
    if (!root)
        goto Enomem;
    /*
     * since this is the first inode, make it number 1. New inodes created
     * after this must take care not to collide with it (by passing
     * max_reserved of 1 to iunique).
     */
    root->i_ino = 1;
    root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
    root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
    dentry = __d_alloc(s, &d_name);
    if (!dentry) {
        iput(root);
        goto Enomem;
    }
    d_instantiate(dentry, root);
    s->s_root = dentry;
    s->s_d_op = dops;
    s->s_flags |= MS_ACTIVE;
    return dget(s->s_root);

Enomem:
    deactivate_locked_super(s);
    return ERR_PTR(-ENOMEM);
}

/****************************************************************************************/

socket系统调用的实现:

主要有两部分:创建socket/ 关联socket and 文件描述符
SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
{
    int retval;
    struct socket *sock;
    int flags;

    /* Check the SOCK_* constants for consistency.  */
    BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
    BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
    BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
    BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);

    flags = type & ~SOCK_TYPE_MASK;
    if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
        return -EINVAL;
    type &= SOCK_TYPE_MASK;

    if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
        flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;

    retval = sock_create(family, type, protocol, &sock);
    if (retval < 0)
        goto out;

    retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
    if (retval < 0)
        goto out_release;

out:
    /* It may be already another descriptor 8) Not kernel problem. */
    return retval;

out_release:
    sock_release(sock);
    return retval;
}


创建socket

/*sock_create的实现,这里 name space proxy其中的net name space,就是说这里关联到net*/
int sock_create(int family, int type, int protocol, struct socket **res)
{
    return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
}

crash> task_struct | grep nsproxy
    struct nsproxy *nsproxy;
crash> nsproxy
struct nsproxy {
    atomic_t count;
    struct uts_namespace *uts_ns;
    struct ipc_namespace *ipc_ns;
    struct mnt_namespace *mnt_ns;
    struct pid_namespace *pid_ns;
    struct net *net_ns;
}
SIZE: 24

crash> struct net
struct net {
    atomic_t passive;
    atomic_t count;
    spinlock_t rules_mod_lock;
    struct list_head list;
    struct list_head cleanup_list;
    struct list_head exit_list;
    struct proc_dir_entry *proc_net;
    struct proc_dir_entry *proc_net_stat;
    struct ctl_table_set sysctls;
    struct sock *rtnl;
    struct sock *genl_sock;
    struct list_head dev_base_head;
    struct hlist_head *dev_name_head;
    struct hlist_head *dev_index_head;
    unsigned int dev_base_seq;
    struct list_head rules_ops;
    struct net_device *loopback_dev;
    struct netns_core core;
    struct netns_mib mib;
    struct netns_packet packet;
    struct netns_unix unx;
    struct netns_ipv4 ipv4;
    struct netns_ipv6 ipv6;
    struct netns_xt xt;
    struct netns_ct ct;
    struct sock *nfnl;
    struct sock *nfnl_stash;
    struct sk_buff_head wext_nlevents;
    struct net_generic *gen;
    struct netns_xfrm xfrm;
    struct netns_ipvs *ipvs;
}
SIZE: 1376

/*__sock_create创建了socket,调用对应net family的create 函数*/
crash> socket
struct socket {
    socket_state state;
    short type;
    unsigned long flags;
    struct socket_wq *wq;
    struct file *file;
    struct sock *sk;
    const struct proto_ops *ops;
}
SIZE: 28

int __sock_create(struct net *net, int family, int type, int protocol,
             struct socket **res, int kern)
{
    struct socket *sock;
    const struct net_proto_family *pf;

    /*
     *    Allocate the socket and allow the family to set things up. if
     *    the protocol is 0, the family is instructed to select an appropriate
     *    default.
     */
    sock = sock_alloc();
    sock->type = type;
    pf = rcu_dereference(net_families[family]);
    err = pf->create(net, sock, protocol, kern);
    *res = sock;

    return 0;
}

crash> net_proto_family
struct net_proto_family {
    int family;
    int (*create)(struct net *, struct socket *, int, int);
    struct module *owner;
}

crash> net_families
net_families = $1 =
{0x0,
0xc0561d3c <unix_family_ops>,
0xc0560954 <inet_family_ops>,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0xc0561eb0 <inet6_family_ops>,
0x0, 0x0, 0x0, 0x0,
0xc0563458 <pfkey_family_ops>,
0xc055f384 <netlink_family_ops>,
0xc05632e0 <packet_family_ops>,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0xc0543358 <pppox_proto_family>,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0xc0703c8c <bt_sock_family_ops>,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
}


关联socket and file

int sock_map_fd(struct socket *sock, int flags)
{
    struct file *newfile;
    int fd = sock_alloc_file(sock, &newfile, flags);

    if (likely(fd >= 0))
        fd_install(fd, newfile);

    return fd;
}


/*
 *    Obtains the first available file descriptor and sets it up for use.
 *
 *    These functions create file structures and maps them to fd space
 *    of the current process. On success it returns file descriptor
 *    and file struct implicitly stored in sock->file.
 *    Note that another thread may close file descriptor before we return
 *    from this function. We use the fact that now we do not refer
 *    to socket after mapping. If one day we will need it, this
 *    function will increment ref. count on file by 1.
 *
 *    In any case returned fd MAY BE not valid!
 *    This race condition is unavoidable
 *    with shared fd spaces, we cannot solve it inside kernel,
 *    but we take care of internal coherence yet.
 */

static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
{
    struct qstr name = { .name = "" };
    struct path path;
    struct file *file;
    int fd;

    fd = get_unused_fd_flags(flags);

    path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);

    path.mnt = mntget(sock_mnt);

    d_instantiate(path.dentry, SOCK_INODE(sock));
    / *inode的 fops赋值为socket_file_ops*/
    SOCK_INODE(sock)->i_fop = &socket_file_ops;/*inode fops*/

    file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
          &socket_file_ops);


    sock->file = file;
    file->f_flags = O_RDWR | (flags & O_NONBLOCK);
    file->f_pos = 0;/*is NULL*/
    file->private_data = sock;

    *f = file;
    return fd;
}

/*
 *    Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
 *    in the operation structures but are done directly via the socketcall() multiplexor.
 */

static const struct file_operations socket_file_ops = {
    .owner =    THIS_MODULE,
    .llseek =    no_llseek,
    .aio_read =    sock_aio_read,
    .aio_write =    sock_aio_write,
    .poll =        sock_poll,
    .unlocked_ioctl = sock_ioctl,

    .mmap =        sock_mmap,
    .open =        sock_no_open,    /* special open code to disallow open via /proc */
    .release =    sock_close,
    .fasync =    sock_fasync,
    .sendpage =    sock_sendpage,
    .splice_write = generic_splice_sendpage,
    .splice_read =    sock_splice_read,
};

/*
 * Install a file pointer in the fd array.
 */
void fd_install(unsigned int fd, struct file *file)
{
    struct files_struct *files = current->files;
    struct fdtable *fdt;
    spin_lock(&files->file_lock);
    fdt = files_fdtable(files);
    BUG_ON(fdt->fd[fd] != NULL);
    rcu_assign_pointer(fdt->fd[fd], file);
    spin_unlock(&files->file_lock);
}

/************************************************************************************/

以#define AF_NETLINK    16 为例看socket的创建过程:

int __sock_create(struct net *net, int family, int type, int protocol,
             struct socket **res, int kern)
{
    struct socket *sock;
    const struct net_proto_family *pf;

    /*
     *    Allocate the socket and allow the family to set things up. if
     *    the protocol is 0, the family is instructed to select an appropriate
     *    default.
     */
    sock = sock_alloc();
    sock->type = type;
    pf = rcu_dereference(net_families[family]);
    err = pf->create(net, sock, protocol, kern);
    *res = sock;

    return 0;
}

以0xc055f384 <netlink_family_ops>,
#define AF_NETLINK    16

crash> netlink_family_ops
netlink_family_ops = $11 = {
  family = 16,
  create = 0xc03f29b8 <netlink_create>,
  owner = 0x0
}


static int netlink_create(struct net *net, struct socket *sock, int protocol,
              int kern)
{
    struct module *module = NULL;
    struct mutex *cb_mutex;
    struct netlink_sock *nlk;
    int err = 0;

    sock->state = SS_UNCONNECTED;


    err = __netlink_create(net, sock, cb_mutex, protocol);

    local_bh_disable();
    sock_prot_inuse_add(net, &netlink_proto, 1);
    local_bh_enable();

    nlk = nlk_sk(sock->sk);
    nlk->module = module;

    return err;

}

static int __netlink_create(struct net *net, struct socket *sock,
                struct mutex *cb_mutex, int protocol)
{
    struct sock *sk;
    struct netlink_sock *nlk;

    sock->ops = &netlink_ops;

    sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
    if (!sk)
        return -ENOMEM;

    sock_init_data(sock, sk);

    nlk = nlk_sk(sk);
    if (cb_mutex)
        nlk->cb_mutex = cb_mutex;
    else {
        nlk->cb_mutex = &nlk->cb_def_mutex;
        mutex_init(nlk->cb_mutex);
    }
    init_waitqueue_head(&nlk->wait);

    sk->sk_destruct = netlink_sock_destruct;
    sk->sk_protocol = protocol;
    return 0;
}

static const struct proto_ops netlink_ops = {
    .family =    PF_NETLINK,
    .owner =    THIS_MODULE,
    .release =    netlink_release,
    .bind =        netlink_bind,
    .connect =    netlink_connect,
    .socketpair =    sock_no_socketpair,
    .accept =    sock_no_accept,
    .getname =    netlink_getname,
    .poll =        datagram_poll,
    .ioctl =    sock_no_ioctl,
    .listen =    sock_no_listen,
    .shutdown =    sock_no_shutdown,
    .setsockopt =    netlink_setsockopt,
    .getsockopt =    netlink_getsockopt,
    .sendmsg =    netlink_sendmsg,
    .recvmsg =    netlink_recvmsg,
    .mmap =        sock_no_mmap,
    .sendpage =    sock_no_sendpage,

};

socket read system call


上图对应的代码流程

SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
{
    struct file *file;
    ssize_t ret = -EBADF;
    int fput_needed;

    file = fget_light(fd, &fput_needed);/*从fd得到file object*/
    if (file) {
        loff_t pos = file_pos_read(file);/*从哪里开始read*/
        ret = vfs_read(file, buf, count, &pos);
        file_pos_write(file, pos);
        fput_light(file, fput_needed);
    }

    return ret;
}



ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
{
    ssize_t ret;

    if (!(file->f_mode & FMODE_READ))
        return -EBADF;
    if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
        return -EINVAL;
    if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
        return -EFAULT;

    ret = rw_verify_area(READ, file, pos, count);
    if (ret >= 0) {
        count = ret;
        if (file->f_op->read)
            ret = file->f_op->read(file, buf, count, pos);
        else
            ret = do_sync_read(file, buf, count, pos);
        if (ret > 0) {
            fsnotify_access(file);
            add_rchar(current, ret);
        }
        inc_syscr(current);
    }

    return ret;
}


ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
    struct iovec iov = { .iov_base = buf, .iov_len = len };
    struct kiocb kiocb;
    ssize_t ret;

    init_sync_kiocb(&kiocb, filp);
    kiocb.ki_pos = *ppos;
    kiocb.ki_left = len;
    kiocb.ki_nbytes = len;

    for (;;) {
        ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
        if (ret != -EIOCBRETRY)
            break;
        wait_on_retry_sync_kiocb(&kiocb);
    }

    if (-EIOCBQUEUED == ret)
        ret = wait_on_sync_kiocb(&kiocb);
    *ppos = kiocb.ki_pos;
    return ret;
}

/*sockfs*/
.aio_read =    sock_aio_read,

static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
                unsigned long nr_segs, loff_t pos)
{
    struct sock_iocb siocb, *x;

    if (pos != 0)
        return -ESPIPE;

    if (iocb->ki_left == 0)    /* Match SYS5 behaviour */
        return 0;


    x = alloc_sock_iocb(iocb, &siocb);
    if (!x)
        return -ENOMEM;
    return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
}

static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
        struct file *file, const struct iovec *iov,
        unsigned long nr_segs)
{
    struct socket *sock = file->private_data;
    size_t size = 0;
    int i;

    for (i = 0; i < nr_segs; i++)
        size += iov[i].iov_len;

    msg->msg_name = NULL;
    msg->msg_namelen = 0;
    msg->msg_control = NULL;
    msg->msg_controllen = 0;
    msg->msg_iov = (struct iovec *)iov;
    msg->msg_iovlen = nr_segs;
    msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;

    return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
}

static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
                 struct msghdr *msg, size_t size, int flags)
{
    int err = security_socket_recvmsg(sock, msg, size, flags);

    return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
}

static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
                       struct msghdr *msg, size_t size, int flags)
{
    struct sock_iocb *si = kiocb_to_siocb(iocb);

    sock_update_classid(sock->sk);

    si->sock = sock;
    si->scm = NULL;
    si->msg = msg;
    si->size = size;
    si->flags = flags;

    return sock->ops->recvmsg(iocb, sock, msg, size, flags);
}


这篇关于socket fs(2)的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!



http://www.chinasem.cn/article/1027459

相关文章

Java Socket服务器端与客户端的编程步骤总结

一,InetAddress类: InetAddress类没有构造方法,所以不能直接new出一个对象; 可以通过InetAddress类的静态方法获得InetAddress的对象; InetAddress.getLocalHost(); InetAddress.getByName(""); 类主要方法: String - address.getHostName(); String - addre

VC环境下window网络程序:UDP Socket程序

最近在学Windows网络编程,正好在做UDPsocket的程序,贴上来: 服务器框架函数:              socket();    bind();    recfrom();  sendto();  closesocket(); 客户机框架函数:            socket();      recfrom();  sendto();  closesocket();

socket()接口与内核协议栈的挂接

最近在看Brdige的代码,发现一个问题,同样的调用ioctl接口实现添加网桥、删除网桥、网桥增加网卡、网桥删除网卡等操作,一个应用层的接口,却通过两条路径实现,sock_ioctl和RTNETLINK(这本就不是一个级别的东西),而应用层的brctl-utils源码中并没有直接使用PF_NETLINK协议簇的情况,让我感到非常奇怪,因此想把glibc到系统调用,到协议簇注册,以及和VFS的关系再

linux下的Socket网络编程教程

套接字概念 Socket本身有“插座”的意思,在Linux环境下,用于表示进程间网络通信的特殊文件类型。本质为内核借助缓冲区形成的伪文件。与管道类似的,Linux系统将其封装成文件的目的是为了统一接口,使得读写套接字和读写文件的操作一致。区别是管道主要应用于本地进程间通信,而套接字多应用于网络进程间数据的传递。在TCP/IP协议中,“IP地址+TCP或UDP端口号”唯一标识网络通讯中的一个进程。

socket函数接收发送详解

http://blog.csdn.net/g_brightboy/article/details/12854117 http://blog.csdn.net/liangkaiyang/article/details/5931901 send。。。 这里只描述同步Socket的send函数的执行流程。 当调用该函数时,send先比较待发送数据的长度

linux下socket常用函数

1、setprotoent(打开网络协议的数据文件) 相关函数  getprotobyname, getprotobynumber, endprotoent 表头文件  #include <netdb.h> 定义函数  void setprotoent (int stayopen); 函数说明      setprotoent()用来打开/etc/protocols,如果参数

udp网络通信 socket

套接字是实现进程间通信的编程。IP可以标定主机在全网的唯一性,端口可以标定进程在主机的唯一性,那么socket通过IP+端口号就可以让两个在全网唯一标定的进程进行通信。 套接字有三种: 域间套接字:实现主机内部的进程通信的编程 原始套接字:使用网络层或者数据链路层的接口进行编程,更难更底层,例如制作抓包等网络工具 网络套接字:实现用户通信的编程 udp网络通信 服务端server 分

c# Socket编程基础知识

这一篇文章,将图文并茂地介绍Socket编程的基础知识,我相信,如果你按照步骤做完实验,一定可以对Socket编程有更好地理解。 本文源代码,可以通过这里下载 http://files.cnblogs.com/chenxizhang/SocketWorkshop.rar   第一步:创建解决方案 第二步:创建服务端程序 这里可以选择“Console Application”这个类型,

C++与AS3中socket字节顺序

c++中默认字节顺序是大端,AS3中默认字节顺序是小端(见socket类中endian属性) 所以AS3与C++做socket通信时,无论发送还是接受C++消息时,需要修改字节顺序,例如 var bytes:ByteArray=new ByteArray; bytes.endian=Endian.LITTLE_ENDIAN;

Android中Socket通信之TCP与UDP传输原理

一、Socket通信简介  Android与服务器的通信方式主要有两种,一是Http通信,一是Socket通信。两者的最大差异在于,http连接使用的是“请求—响应方式”,即在请求时建立连接通道,当客户端向服务器发送请求后,服务器端才能向客户端返回数据。 而Socket通信中基于TCP/IP协议的通信则是在双方建立起连接后就可以直接进行数据的传输,在连接时可实现信息的主动推送,而不需要每