EXT4文件系统学习（14）VFS之VFS inode

本文主要是介绍EXT4文件系统学习（14）VFS之VFS inode，希望对大家解决编程问题提供一定的参考价值，需要的开发者们随着小编来一起学习吧！

不同的文件系统inode也不一样，对inode的操作函数也不一样，VFS inode的作用就是隐藏下面具体文件系统的inode差异，向上层提供统一的接口。分享Linux爱好者的一篇文章：深度剖析 Linux cp 命令的秘密，里面介绍了稀疏文件。

inode

struct inode {umode_t			i_mode;unsigned short		i_opflags;kuid_t			i_uid;kgid_t			i_gid;unsigned int		i_flags;#ifdef CONFIG_FS_POSIX_ACLstruct posix_acl	*i_acl;struct posix_acl	*i_default_acl;
#endifconst struct inode_operations	*i_op; 指定一组对inode的操作函数struct super_block	*i_sb;struct address_space	*i_mapping;#ifdef CONFIG_SECURITYvoid			*i_security;
#endif/* Stat data, not accessed from path walking */unsigned long		i_ino;/** Filesystems may only read i_nlink directly.  They shall use the* following functions for modification:**    (set|clear|inc|drop)_nlink 硬链接数量*    inode_(inc|dec)_link_count*/union {const unsigned int i_nlink;unsigned int __i_nlink;};dev_t			i_rdev;loff_t			i_size;struct timespec		i_atime;struct timespec		i_mtime;struct timespec		i_ctime;spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */unsigned short          i_bytes;unsigned int		i_blkbits;blkcnt_t		i_blocks;#ifdef __NEED_I_SIZE_ORDEREDseqcount_t		i_size_seqcount;
#endif/* Misc */unsigned long		i_state;struct mutex		i_mutex;unsigned long		dirtied_when;	/* jiffies of first dirtying */unsigned long		dirtied_time_when;struct hlist_node	i_hash;struct list_head	i_wb_list;	/* backing dev IO list */struct list_head	i_lru;		/* inode LRU list */struct list_head	i_sb_list;union {struct hlist_head	i_dentry;struct rcu_head		i_rcu;};u64			i_version;atomic_t		i_count; 引用计数atomic_t		i_dio_count;atomic_t		i_writecount;
#ifdef CONFIG_IMAatomic_t		i_readcount; /* struct files open RO */
#endifconst struct file_operations	*i_fop;	指定对文件内容本身的操作函数struct file_lock_context	*i_flctx;struct address_space	i_data;struct list_head	i_devices;union {struct pipe_inode_info	*i_pipe;特殊文件系统，如字符设备struct block_device	*i_bdev;struct cdev		*i_cdev;};__u32			i_generation;#ifdef CONFIG_FSNOTIFY__u32			i_fsnotify_mask; /* all events this inode cares about */struct hlist_head	i_fsnotify_marks;
#endifvoid			*i_private; /* fs or device private pointer */
};

结构体大部分成员都是根据磁盘的inode初始化的，详细见上一章。

根据inode编号获取inode结构操作是很繁琐的，因此内核使用hash表让每一个inode通过i_hash链接到hash表。

VFS inode的i_op和i_fop指针操作具体文件系统的inode，但是目录文件、链接文件等是分开的，具体可分为4中情况：

普通文件和目录的inode_operations

普通文件

const struct inode_operations ext4_file_inode_operations = {.setattr	= ext4_setattr,.getattr	= ext4_getattr,.setxattr	= generic_setxattr,.getxattr	= generic_getxattr,.listxattr	= ext4_listxattr,.removexattr	= generic_removexattr,.get_acl	= ext4_get_acl,.set_acl	= ext4_set_acl,.fiemap		= ext4_fiemap,
};

const struct inode_operations ext4_dir_inode_operations = {.create		= ext4_create, 创建文件.lookup		= ext4_lookup,.link		= ext4_link, 硬链接.unlink		= ext4_unlink,.symlink	= ext4_symlink, 软链接.mkdir		= ext4_mkdir, 创建目录.rmdir		= ext4_rmdir,.mknod		= ext4_mknod, 创建设备节点.tmpfile	= ext4_tmpfile,.rename2	= ext4_rename2,.setattr	= ext4_setattr,.setxattr	= generic_setxattr,.getxattr	= generic_getxattr,.listxattr	= ext4_listxattr,.removexattr	= generic_removexattr,.get_acl	= ext4_get_acl,.set_acl	= ext4_set_acl,.fiemap         = ext4_fiemap,
};

普通文件和目录都有一个对应的inode结构，在某个目录下创建一个文件，会调用目录对应的inode结构的ext4_create（）函数，ext4_create（）会调用__ext4_new_inode从磁盘上分配一个空闲的inode，同时初始化ext4内存中的inode机构。

创建一个子目录，使用函数ext4_mkdir，也会调用__ext4_new_inode从磁盘上分配一个空闲的inode，同时初始化ext4内存中的inode机构。

链接文件的inode_operations

普通文件的定位：找到direntry，然后从direntry中的文件名读出inode号，最后读出inode信息。

硬链接文件：因为硬链接文件的inode号与源文件一致的，所以定位比较简单。

软链接文件：需要特殊处理，由于inode号不一致，所以需要先读取inode号获取出源文件的路径，再根据路径定位出目标文件。如果目标文件路径小于60字节，那么称为Fast Symbol link，因为这样的话路径信息就直接保存在inode所在的block，就不需要额外的block了。

const struct inode_operations ext4_fast_symlink_inode_operations = {.readlink	= generic_readlink, 在inode的block中读出目标文件路径.follow_link    = ext4_follow_fast_link, 把对链接文件的操作直接转到目标文件.setattr	= ext4_setattr,.setxattr	= generic_setxattr,.getxattr	= generic_getxattr,.listxattr	= ext4_listxattr,.removexattr	= generic_removexattr,
};

目标文件路径大于60字节的，称为普通符号链接，需要根据inode中的数据块地址读出目标文件的地址：

const struct inode_operations ext4_symlink_inode_operations = {.readlink	= generic_readlink,.follow_link    = ext4_follow_link,.put_link       = ext4_put_link,.setattr	= ext4_setattr,.setxattr	= generic_setxattr,.getxattr	= generic_getxattr,.listxattr	= ext4_listxattr,.removexattr	= generic_removexattr,
};

多出一个put_link函数，且follow_link与快速链接不同，follow_link把inode地址对应的数据数据读出到内存解析，处理我拿出后由put_link来释放内存。

根据前面分析的ext4_fill_super函数中ext4_iget对文件和目录的i_fop赋值也是不一样的，下面继续分析i_fop：

文件的file_operations

const struct file_operations ext4_file_operations = {.llseek		= ext4_llseek, 调整文件读写指针.read_iter	= generic_file_read_iter,.write_iter	= ext4_file_write_iter,.unlocked_ioctl = ext4_ioctl,
#ifdef CONFIG_COMPAT.compat_ioctl	= ext4_compat_ioctl, 向设备文件发送ioctl命令
#endif.mmap		= ext4_file_mmap, .open		= ext4_file_open,.release	= ext4_release_file,.fsync		= ext4_sync_file,.splice_read	= generic_file_splice_read,.splice_write	= iter_file_splice_write,.fallocate	= ext4_fallocate,
};

read使用的是通用文件系统读函数generic_file_read_iter，支持IOCB_DIRECT直接读模式和缓存模式，IOCB_DIRECT模式在open文件时传入模式参数，绕过缓存直接对磁盘读写操作；使用缓存模式是do_generic_file_read函数中体现，函数中会检查数据是否已经缓存，如果没有就会预读取，将数据加载入缓存页。

mmap函数把一个文件内容映射到进程的虚拟地址空间中（利用页表），这样可以通过内存指针p[n]来访问文件内容。

open函数打开文件操作，建立相关的内存管理结构，如inode对象。

release函数减少文件的引用计数，当引用计数为0时，会关闭文件对象，同时释放相关的内存管理结构。

fsync函数把内存中文件内容数据写入磁盘，splice_read/write用于管道操作。

目录的dir_operations

const struct file_operations ext4_dir_operations = {.llseek		= ext4_dir_llseek,.read		= generic_read_dir,.iterate	= ext4_readdir,.unlocked_ioctl = ext4_ioctl,
#ifdef CONFIG_COMPAT.compat_ioctl	= ext4_compat_ioctl,
#endif.fsync		= ext4_sync_file,.release	= ext4_release_dir,
};

read是一个空函数，为啥呢？

iterate是readdir函数，从目录的数据块中把目录项读出来，目录项direntry数据块中保存了目录下存储的文件名信息等。