linux mount过程

xiaoxiao2021-03-25 109

接下来我们以ext4 文件系统mount过程为例，讲解下文件系统的几种数据结构之间的关联。如果linux版本有支持ext4 fs，那么在linux初始化时会调用static int __init ext4_init_fs(void)，这个函数会通过register_filesystem(&ext4_fs_type)向系统注册ext4文件系统到全局file_systems结构中。注册之后os就可以识别此文件系统，当要使用ext4时，通过mount函数加载ext4 super block,inode信息，

之后就可以进行ext4读写了。

SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, char __user *, type, unsigned long, flags, void __user *, data) { int ret; char *kernel_type; struct filename *kernel_dir; char *kernel_dev; unsigned long data_page; //复制文件系统类型名到内核 ret = copy_mount_string(type, &kernel_type); if (ret < 0) goto out_type; //得到挂载点路径名 kernel_dir = getname(dir_name); if (IS_ERR(kernel_dir)) { ret = PTR_ERR(kernel_dir); goto out_dir; } //获取文件系统所在设备名，如/dev/sda1 ret = copy_mount_string(dev_name, &kernel_dev); if (ret < 0) goto out_dev; //获取挂载options信息 ret = copy_mount_options(data, &data_page); if (ret < 0) goto out_data; //mount主体函数 ret = do_mount(kernel_dev, kernel_dir->name, kernel_type, flags, (void *) data_page); free_page(data_page); out_data: kfree(kernel_dev); out_dev: putname(kernel_dir); out_dir: kfree(kernel_type); out_type: return ret; } //先检查挂载参数，之后调用不同的mount函数 long do_mount(const char *dev_name, const char *dir_name, const char *type_page, unsigned long flags, void *data_page) { struct path path; int retval = 0; int mnt_flags = 0; printk(KERN_ERR "dev_name:%s dir_name:%s \n",dev_name,dir_name); /* Discard magic */ if ((flags & MS_MGC_MSK) == MS_MGC_VAL) flags &= ~MS_MGC_MSK; /* Basic sanity checks */ if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE)) return -EINVAL; if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; /* ... and get the mountpoint */ //解析dir_name获取挂载路径 retval = kern_path(dir_name, LOOKUP_FOLLOW, &path); if (retval) return retval; //挂载安全性检测 retval = security_sb_mount(dev_name, &path, type_page, flags, data_page); if (!retval && !may_mount()) retval = -EPERM; if (retval) goto dput_out; /* Default to relatime unless overriden */ if (!(flags & MS_NOATIME)) mnt_flags |= MNT_RELATIME; /* Separate the per-mountpoint flags */ if (flags & MS_NOSUID) mnt_flags |= MNT_NOSUID; if (flags & MS_NODEV) mnt_flags |= MNT_NODEV; if (flags & MS_NOEXEC) mnt_flags |= MNT_NOEXEC; if (flags & MS_NOATIME) mnt_flags |= MNT_NOATIME; if (flags & MS_NODIRATIME) mnt_flags |= MNT_NODIRATIME; if (flags & MS_STRICTATIME) mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME); if (flags & MS_RDONLY) mnt_flags |= MNT_READONLY; /* The default atime for remount is preservation */ if ((flags & MS_REMOUNT) && ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME | MS_STRICTATIME)) == 0)) { mnt_flags &= ~MNT_ATIME_MASK; mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK; } flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | MS_STRICTATIME); if (flags & MS_REMOUNT) retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags, data_page); else if (flags & MS_BIND) retval = do_loopback(&path, dev_name, flags & MS_REC); else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) retval = do_change_type(&path, flags); else if (flags & MS_MOVE) retval = do_move_mount(&path, dev_name); else retval = do_new_mount(&path, type_page, flags, mnt_flags, dev_name, data_page); dput_out: path_put(&path); return retval; } /* 对于一个新的文件系统初次挂载会调用do_new_mount，这个函数会先给这个文件系统创建一个struct mount结构，调用文件系统特有的mount函数，最后将struct mount加入到全局文件树中 */ static int do_new_mount(struct path *path, const char *fstype, int flags, int mnt_flags, const char *name, void *data) { struct file_system_type *type; struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; struct vfsmount *mnt; int err; if (!fstype) return -EINVAL; //通过name获取文件系统类型 type = get_fs_type(fstype); if (!type) return -ENODEV; printk(KERN_ERR "fs type:%s\n",type->name); if (user_ns != &init_user_ns) { if (!(type->fs_flags & FS_USERNS_MOUNT)) { put_filesystem(type); return -EPERM; } /* Only in special cases allow devices from mounts * created outside the initial user namespace. */ if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) { flags |= MS_NODEV; mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } } //获取struct mount结构,调用特定文件系统mount函数，主要填充super block数据 mnt = vfs_kern_mount(type, flags, name, data); //有子文件系统 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && !mnt->mnt_sb->s_subtype) mnt = fs_set_subtype(mnt, fstype); put_filesystem(type); if (IS_ERR(mnt)) return PTR_ERR(mnt); //将mount加入到全局文件树中 err = do_add_mount(real_mount(mnt), path, mnt_flags); if (err) mntput(mnt); return err; } struct vfsmount * vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) { struct mount *mnt; struct dentry *root; if (!type) return ERR_PTR(-ENODEV); //分配并初始化struct mount 结构 mnt = alloc_vfsmnt(name); if (!mnt) return ERR_PTR(-ENOMEM); if (flags & MS_KERNMOUNT) mnt->mnt.mnt_flags = MNT_INTERNAL; //调用具体文件系统的mount函数 root = mount_fs(type, flags, name, data); if (IS_ERR(root)) { free_vfsmnt(mnt); return ERR_CAST(root); } //初始化mnt变量，并将mnt加入超级块s_mounts链表中 mnt->mnt.mnt_root = root; mnt->mnt.mnt_sb = root->d_sb; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; br_write_lock(&vfsmount_lock); list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); br_write_unlock(&vfsmount_lock); return &mnt->mnt; } struct dentry * mount_fs(struct file_system_type *type, int flags, const char *name, void *data) { struct dentry *root; struct super_block *sb; char *secdata = NULL; int error = -ENOMEM; if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { secdata = alloc_secdata(); if (!secdata) goto out; error = security_sb_copy_data(data, secdata); if (error) goto out_free_secdata; } //具体文件系统的mount函数，比如ext4，该函数就是系统初始化时注册的ext4_fs_type 里面的mount root = type->mount(type, flags, name, data);//返回mount后的denty if (IS_ERR(root)) { error = PTR_ERR(root); goto out_free_secdata; } sb = root->d_sb; BUG_ON(!sb); WARN_ON(!sb->s_bdi); WARN_ON(sb->s_bdi == &default_backing_dev_info); sb->s_flags |= MS_BORN; error = security_sb_kern_mount(sb, flags, secdata); if (error) goto out_sb; /* * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE * but s_maxbytes was an unsigned long long for many releases. Throw * this warning for a little while to try and catch filesystems that * violate this rule. */ WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " "negative value (%lld)\n", type->name, sb->s_maxbytes); up_write(&sb->s_umount); free_secdata(secdata); return root; out_sb: dput(root); deactivate_locked_super(sb); out_free_secdata: free_secdata(secdata); out: return ERR_PTR(error); } //newmnt: 新创建的挂载实例 path:挂载路径 static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) { struct mountpoint *mp; struct mount *parent; int err; mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); //这里不是简单的加锁，如果path上挂载了很多文件系统，那么这里就是要找出最新一次挂载到其上的文件系统的根路径，这才//是我们这个文件系统要挂载到的mountpoint mp = lock_mount(path); if (IS_ERR(mp)) return PTR_ERR(mp); parent = real_mount(path->mnt);//得到挂载点所属的挂载结构 err = -EINVAL; if (unlikely(!check_mnt(parent))) { /* that's acceptable only for automounts done in private ns */ if (!(mnt_flags & MNT_SHRINKABLE)) goto unlock; /* ... and for those we'd better have mountpoint still alive */ if (!parent->mnt_ns) goto unlock; } /* Refuse the same filesystem on the same mount point */ err = -EBUSY; //禁止同一个文件系统挂在到同一个挂载点 if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb && path->mnt->mnt_root == path->dentry) goto unlock; err = -EINVAL; if (S_ISLNK(newmnt->mnt.mnt_root->d_inode->i_mode)) goto unlock; newmnt->mnt.mnt_flags = mnt_flags; //把newmnt加入到全局文件系统树中 err = graft_tree(newmnt, parent, mp); unlock: unlock_mount(mp); return err; } static struct mountpoint *lock_mount(struct path *path) { struct vfsmount *mnt; struct dentry *dentry = path->dentry; retry: mutex_lock(&dentry->d_inode->i_mutex); if (unlikely(cant_mount(dentry))) { mutex_unlock(&dentry->d_inode->i_mutex); return ERR_PTR(-ENOENT); } namespace_lock(); mnt = lookup_mnt(path); if (likely(!mnt)) {//这里表示dentry上未挂载文件系统，创建一个新的mountpoint 返回 struct mountpoint *mp = new_mountpoint(dentry); if (IS_ERR(mp)) { namespace_unlock(); mutex_unlock(&dentry->d_inode->i_mutex); return mp; } return mp; } namespace_unlock(); mutex_unlock(&path->dentry->d_inode->i_mutex); path_put(path); // 如果lookup_mnt没有返回NULL，则说明它找到了挂载在/mnt上的子文件系统，下面的逻辑是： // 把子文件系统的mount结构赋值给path->mnt path->mnt = mnt; //如果此dentry之前挂载了文件系统，则新的dentry将为子文件系统mnt的挂载点 dentry = path->dentry = dget(mnt->mnt_root); // 返回到lookup_mnt函数，用新的path变量继续查找是否还有后续的子文件系统 //这样组成的list结构:p->C1->C2->C3，从全局来看后挂载的会覆盖之前挂载的文件系统 goto retry; } //参数为挂载点所属的挂载实例跟目录项,dir为移动方向 /* 路径名查找时都会调用到这个函数，它的作用就是根据一个父<mount, dentry> 二元组找到挂载在其下面的子文件系统的mount实例，如果没找到就返回NULL */ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, int dir) { struct list_head *head = mount_hashtable + hash(mnt, dentry); struct list_head *tmp = head; struct mount *p, *found = NULL; for (;;) { tmp = dir ? tmp->next : tmp->prev; p = NULL; if (tmp == head)//循环一圈未找到 break; p = list_entry(tmp, struct mount, mnt_hash);//mnt_hash 链接到mount_hashtable if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) {//p其实是参数mnt的子文件系统 found = p; break; } } return found; }

转载请注明原文地址: https://ju.6miu.com/read-15036.html

技术

最新回复(0)