本文主要是介绍socket fs(2),希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
/************************************************************************************/socketfs的创建
sock_init ->{
register_filesystem(&sock_fs_type);
sock_mnt = kern_mount(&sock_fs_type);
}
static struct file_system_type sock_fs_type = {
.name = "sockfs",
.mount = sockfs_mount,
.kill_sb = kill_anon_super,
};
kern_mount -> kern_mount_data -> vfs_kern_mount -> mount_fs ->type->mount();
static struct dentry *sockfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
return mount_pseudo(fs_type, "socket:", &sockfs_ops,
&sockfs_dentry_operations, SOCKFS_MAGIC);
}
static const struct super_operations sockfs_ops = {
.alloc_inode = sock_alloc_inode,
.destroy_inode = sock_destroy_inode,
.statfs = simple_statfs,
};
static const struct dentry_operations sockfs_dentry_operations = {
.d_dname = sockfs_dname,
};
/*
* Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
* will never be mountable)
*/
struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
const struct super_operations *ops,
const struct dentry_operations *dops, unsigned long magic)
{
struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
struct dentry *dentry;
struct inode *root;
struct qstr d_name = {.name = name, .len = strlen(name)};
if (IS_ERR(s))
return ERR_CAST(s);
s->s_flags = MS_NOUSER;
s->s_maxbytes = MAX_LFS_FILESIZE;
s->s_blocksize = PAGE_SIZE;
s->s_blocksize_bits = PAGE_SHIFT;
s->s_magic = magic;
s->s_op = ops ? ops : &simple_super_operations;
s->s_time_gran = 1;
root = new_inode(s);
if (!root)
goto Enomem;
/*
* since this is the first inode, make it number 1. New inodes created
* after this must take care not to collide with it (by passing
* max_reserved of 1 to iunique).
*/
root->i_ino = 1;
root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
dentry = __d_alloc(s, &d_name);
if (!dentry) {
iput(root);
goto Enomem;
}
d_instantiate(dentry, root);
s->s_root = dentry;
s->s_d_op = dops;
s->s_flags |= MS_ACTIVE;
return dget(s->s_root);
Enomem:
deactivate_locked_super(s);
return ERR_PTR(-ENOMEM);
}
/****************************************************************************************/
socket系统调用的实现:
主要有两部分:创建socket/ 关联socket and 文件描述符SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
{
int retval;
struct socket *sock;
int flags;
/* Check the SOCK_* constants for consistency. */
BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
flags = type & ~SOCK_TYPE_MASK;
if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
return -EINVAL;
type &= SOCK_TYPE_MASK;
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
retval = sock_create(family, type, protocol, &sock);
if (retval < 0)
goto out;
retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
if (retval < 0)
goto out_release;
out:
/* It may be already another descriptor 8) Not kernel problem. */
return retval;
out_release:
sock_release(sock);
return retval;
}
创建socket
/*sock_create的实现,这里 name space proxy其中的net name space,就是说这里关联到net*/
int sock_create(int family, int type, int protocol, struct socket **res)
{
return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
}
crash> task_struct | grep nsproxy
struct nsproxy *nsproxy;
crash> nsproxy
struct nsproxy {
atomic_t count;
struct uts_namespace *uts_ns;
struct ipc_namespace *ipc_ns;
struct mnt_namespace *mnt_ns;
struct pid_namespace *pid_ns;
struct net *net_ns;
}
SIZE: 24
crash> struct net
struct net {
atomic_t passive;
atomic_t count;
spinlock_t rules_mod_lock;
struct list_head list;
struct list_head cleanup_list;
struct list_head exit_list;
struct proc_dir_entry *proc_net;
struct proc_dir_entry *proc_net_stat;
struct ctl_table_set sysctls;
struct sock *rtnl;
struct sock *genl_sock;
struct list_head dev_base_head;
struct hlist_head *dev_name_head;
struct hlist_head *dev_index_head;
unsigned int dev_base_seq;
struct list_head rules_ops;
struct net_device *loopback_dev;
struct netns_core core;
struct netns_mib mib;
struct netns_packet packet;
struct netns_unix unx;
struct netns_ipv4 ipv4;
struct netns_ipv6 ipv6;
struct netns_xt xt;
struct netns_ct ct;
struct sock *nfnl;
struct sock *nfnl_stash;
struct sk_buff_head wext_nlevents;
struct net_generic *gen;
struct netns_xfrm xfrm;
struct netns_ipvs *ipvs;
}
SIZE: 1376
/*__sock_create创建了socket,调用对应net family的create 函数*/
crash> socket
struct socket {
socket_state state;
short type;
unsigned long flags;
struct socket_wq *wq;
struct file *file;
struct sock *sk;
const struct proto_ops *ops;
}
SIZE: 28
int __sock_create(struct net *net, int family, int type, int protocol,
struct socket **res, int kern)
{
struct socket *sock;
const struct net_proto_family *pf;
/*
* Allocate the socket and allow the family to set things up. if
* the protocol is 0, the family is instructed to select an appropriate
* default.
*/
sock = sock_alloc();
sock->type = type;
pf = rcu_dereference(net_families[family]);
err = pf->create(net, sock, protocol, kern);
*res = sock;
return 0;
}
crash> net_proto_family
struct net_proto_family {
int family;
int (*create)(struct net *, struct socket *, int, int);
struct module *owner;
}
crash> net_families
net_families = $1 =
{0x0,
0xc0561d3c <unix_family_ops>,
0xc0560954 <inet_family_ops>,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0xc0561eb0 <inet6_family_ops>,
0x0, 0x0, 0x0, 0x0,
0xc0563458 <pfkey_family_ops>,
0xc055f384 <netlink_family_ops>,
0xc05632e0 <packet_family_ops>,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0xc0543358 <pppox_proto_family>,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0xc0703c8c <bt_sock_family_ops>,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
}
关联socket and file
int sock_map_fd(struct socket *sock, int flags){
struct file *newfile;
int fd = sock_alloc_file(sock, &newfile, flags);
if (likely(fd >= 0))
fd_install(fd, newfile);
return fd;
}
/*
* Obtains the first available file descriptor and sets it up for use.
*
* These functions create file structures and maps them to fd space
* of the current process. On success it returns file descriptor
* and file struct implicitly stored in sock->file.
* Note that another thread may close file descriptor before we return
* from this function. We use the fact that now we do not refer
* to socket after mapping. If one day we will need it, this
* function will increment ref. count on file by 1.
*
* In any case returned fd MAY BE not valid!
* This race condition is unavoidable
* with shared fd spaces, we cannot solve it inside kernel,
* but we take care of internal coherence yet.
*/
static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
{
struct qstr name = { .name = "" };
struct path path;
struct file *file;
int fd;
fd = get_unused_fd_flags(flags);
path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
path.mnt = mntget(sock_mnt);
d_instantiate(path.dentry, SOCK_INODE(sock));
/ *inode的 fops赋值为socket_file_ops*/
SOCK_INODE(sock)->i_fop = &socket_file_ops;/*inode fops*/
file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
&socket_file_ops);
sock->file = file;
file->f_flags = O_RDWR | (flags & O_NONBLOCK);
file->f_pos = 0;/*is NULL*/
file->private_data = sock;
*f = file;
return fd;
}
/*
* Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
* in the operation structures but are done directly via the socketcall() multiplexor.
*/
static const struct file_operations socket_file_ops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.aio_read = sock_aio_read,
.aio_write = sock_aio_write,
.poll = sock_poll,
.unlocked_ioctl = sock_ioctl,
.mmap = sock_mmap,
.open = sock_no_open, /* special open code to disallow open via /proc */
.release = sock_close,
.fasync = sock_fasync,
.sendpage = sock_sendpage,
.splice_write = generic_splice_sendpage,
.splice_read = sock_splice_read,
};
/*
* Install a file pointer in the fd array.
*/
void fd_install(unsigned int fd, struct file *file)
{
struct files_struct *files = current->files;
struct fdtable *fdt;
spin_lock(&files->file_lock);
fdt = files_fdtable(files);
BUG_ON(fdt->fd[fd] != NULL);
rcu_assign_pointer(fdt->fd[fd], file);
spin_unlock(&files->file_lock);
}
/************************************************************************************/
以#define AF_NETLINK 16 为例看socket的创建过程:
int __sock_create(struct net *net, int family, int type, int protocol,struct socket **res, int kern)
{
struct socket *sock;
const struct net_proto_family *pf;
/*
* Allocate the socket and allow the family to set things up. if
* the protocol is 0, the family is instructed to select an appropriate
* default.
*/
sock = sock_alloc();
sock->type = type;
pf = rcu_dereference(net_families[family]);
err = pf->create(net, sock, protocol, kern);
*res = sock;
return 0;
}
以0xc055f384 <netlink_family_ops>,
#define AF_NETLINK 16
crash> netlink_family_ops
netlink_family_ops = $11 = {
family = 16,
create = 0xc03f29b8 <netlink_create>,
owner = 0x0
}
static int netlink_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
struct module *module = NULL;
struct mutex *cb_mutex;
struct netlink_sock *nlk;
int err = 0;
sock->state = SS_UNCONNECTED;
err = __netlink_create(net, sock, cb_mutex, protocol);
local_bh_disable();
sock_prot_inuse_add(net, &netlink_proto, 1);
local_bh_enable();
nlk = nlk_sk(sock->sk);
nlk->module = module;
return err;
}
static int __netlink_create(struct net *net, struct socket *sock,
struct mutex *cb_mutex, int protocol)
{
struct sock *sk;
struct netlink_sock *nlk;
sock->ops = &netlink_ops;
sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
if (!sk)
return -ENOMEM;
sock_init_data(sock, sk);
nlk = nlk_sk(sk);
if (cb_mutex)
nlk->cb_mutex = cb_mutex;
else {
nlk->cb_mutex = &nlk->cb_def_mutex;
mutex_init(nlk->cb_mutex);
}
init_waitqueue_head(&nlk->wait);
sk->sk_destruct = netlink_sock_destruct;
sk->sk_protocol = protocol;
return 0;
}
static const struct proto_ops netlink_ops = {
.family = PF_NETLINK,
.owner = THIS_MODULE,
.release = netlink_release,
.bind = netlink_bind,
.connect = netlink_connect,
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = netlink_getname,
.poll = datagram_poll,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = netlink_setsockopt,
.getsockopt = netlink_getsockopt,
.sendmsg = netlink_sendmsg,
.recvmsg = netlink_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};
socket read system call
上图对应的代码流程
SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
{
struct file *file;
ssize_t ret = -EBADF;
int fput_needed;
file = fget_light(fd, &fput_needed);/*从fd得到file object*/
if (file) {
loff_t pos = file_pos_read(file);/*从哪里开始read*/
ret = vfs_read(file, buf, count, &pos);
file_pos_write(file, pos);
fput_light(file, fput_needed);
}
return ret;
}
ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
{
ssize_t ret;
if (!(file->f_mode & FMODE_READ))
return -EBADF;
if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
return -EINVAL;
if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
return -EFAULT;
ret = rw_verify_area(READ, file, pos, count);
if (ret >= 0) {
count = ret;
if (file->f_op->read)
ret = file->f_op->read(file, buf, count, pos);
else
ret = do_sync_read(file, buf, count, pos);
if (ret > 0) {
fsnotify_access(file);
add_rchar(current, ret);
}
inc_syscr(current);
}
return ret;
}
ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
struct iovec iov = { .iov_base = buf, .iov_len = len };
struct kiocb kiocb;
ssize_t ret;
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
kiocb.ki_left = len;
kiocb.ki_nbytes = len;
for (;;) {
ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
if (ret != -EIOCBRETRY)
break;
wait_on_retry_sync_kiocb(&kiocb);
}
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
*ppos = kiocb.ki_pos;
return ret;
}
/*sockfs*/
.aio_read = sock_aio_read,
static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct sock_iocb siocb, *x;
if (pos != 0)
return -ESPIPE;
if (iocb->ki_left == 0) /* Match SYS5 behaviour */
return 0;
x = alloc_sock_iocb(iocb, &siocb);
if (!x)
return -ENOMEM;
return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
}
static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
struct file *file, const struct iovec *iov,
unsigned long nr_segs)
{
struct socket *sock = file->private_data;
size_t size = 0;
int i;
for (i = 0; i < nr_segs; i++)
size += iov[i].iov_len;
msg->msg_name = NULL;
msg->msg_namelen = 0;
msg->msg_control = NULL;
msg->msg_controllen = 0;
msg->msg_iov = (struct iovec *)iov;
msg->msg_iovlen = nr_segs;
msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
}
static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size, int flags)
{
int err = security_socket_recvmsg(sock, msg, size, flags);
return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
}
static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size, int flags)
{
struct sock_iocb *si = kiocb_to_siocb(iocb);
sock_update_classid(sock->sk);
si->sock = sock;
si->scm = NULL;
si->msg = msg;
si->size = size;
si->flags = flags;
return sock->ops->recvmsg(iocb, sock, msg, size, flags);
}
这篇关于socket fs(2)的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!