Android Go data分区启用了f2fs文件系统,从介绍看是为了提高life time和4K文件读写性能,这个暂不关心,三星init,看kernel提交记录后面主要是huawei也参与进来,据说是重金聘用,多少米?

前段时间项目在Go上遇到一个问题:data填满100%一直起不来(kernel: 3.18)。不过可以使用保留空间加一层保护, 重要系统进程可以使用保留区,其他不可用, rt?,ext4这个特性叫resgid,3.18 f2fs还不支持,我从kernel 4.9移下来,基于这份代码分析。

先看下保留空间相关结构:

struct f2fs_sb_info {
...
    block_t reserved_blocks;        /* configurable reserved blocks */
    block_t current_reserved_blocks;    /* current reserved blocks */
    block_t root_reserved_blocks;        /* root reserved blocks */
    kuid_t s_resuid;            /* reserved blocks for uid */
    kgid_t s_resgid;            /* reserved blocks for gid */

其实reserved_blockscurrent_reserved_blocks是一个reserved feature,主要目的应该是提高性能,在sysfs下可配置。而root_reserved_blocksresuid,resgid才是我们现在这个需求,用man来解释:

man mount:

resgid=n and resuid=n
The ext2 filesystem reserves a certain percentage of the available space (by default 5%, see mke2fs(8) and tune2fs(8)). These options determine who can use the reserved blocks.
(Roughly: whoever has the specified uid, or belongs to the specified group.)

man tune2fs:

-r reserved-blocks-count
Set the number of reserved filesystem blocks.

-g group
Set the group which can use the reserved filesystem blocks. The group parameter can be a numerical gid or a group name. If a group name is given, it is converted to a numerical gid before it is stored in the superblock.

ok, 关键函数:

static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi)
{
    if (!test_opt(sbi, RESERVE_ROOT))
        return false;
    if (capable(CAP_SYS_RESOURCE))
        return true;
    if (uid_eq(sbi->s_resuid, current_fsuid()))
        return true;
    if (!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) &&
                    in_group_p(sbi->s_resgid))
        return true;
    return false;
}

首先检查mount是否有RESERVE_BOOT选项,如果没设置就不让用。

#define set_opt(sbi, option)    ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option)
#define test_opt(sbi, option)    ((sbi)->mount_opt.opt & F2FS_MOUNT_##option)

哪里set_opt了这个了, 在parse_options里:

        case Opt_reserve_root:
            if (args->from && match_int(args, &arg))
                return -EINVAL;
            if (test_opt(sbi, RESERVE_ROOT)) {
                f2fs_msg(sb, KERN_INFO,
                    "Preserve previous reserve_root=%u",
                    sbi->root_reserved_blocks);
            } else { //tj: 走这里
                sbi->root_reserved_blocks = arg;
                set_opt(sbi, RESERVE_ROOT);
            }

mount会解析这个选项:

static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
            const char *dev_name, void *data)
{
    return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super);
}

f2fs_fill_super会call parse_options:

static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
{
...
    /* parse mount options */
    options = kstrdup((const char *)data, GFP_KERNEL);
    if (data && !options) {
        err = -ENOMEM;
        goto free_sb_buf;
    }

    err = parse_options(sb, options);
    if (err)
        goto free_options;

ok, 看下s_resgid allow:

    if (!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) &&
                    in_group_p(sbi->s_resgid))
        return true;
#define KGIDT_INIT(value) (kgid_t){ value }
#define GLOBAL_ROOT_GID KGIDT_INIT(0)

resgid=0的不让用保留了,in_group_p:

/*
 * Check whether we're fsgid/egid or in the supplemental group..
 */
int in_group_p(kgid_t grp)
{
        const struct cred *cred = current_cred();
        int retval = 1;

        if (!gid_eq(grp, cred->fsgid))
                retval = groups_search(cred->group_info, grp);
        return retval;
}

应该就是这个grp有没有加到系统里,可见mount flag要加了reserve_root才能激活。

另外,保留块有大小限制:

static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
{
    block_t limit = (sbi->user_block_count << 1) / 1000;

    /* limit is 0.2% */
    if (test_opt(sbi, RESERVE_ROOT) && sbi->root_reserved_blocks > limit) {
        sbi->root_reserved_blocks = limit;
        f2fs_msg(sbi->sb, KERN_INFO,
            "Reduce reserved blocks for root = %u",
                sbi->root_reserved_blocks);
    }
    if (!test_opt(sbi, RESERVE_ROOT) &&
        (!uid_eq(sbi->s_resuid,
                make_kuid(&init_user_ns, F2FS_DEF_RESUID)) ||
        !gid_eq(sbi->s_resgid,
                make_kgid(&init_user_ns, F2FS_DEF_RESGID))))
        f2fs_msg(sbi->sb, KERN_INFO,
            "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root",
                from_kuid_munged(&init_user_ns, sbi->s_resuid),
                from_kgid_munged(&init_user_ns, sbi->s_resgid));
}

关于id,Android里叫AID,在system/core/libcutils/include/private/android_filesystem_config.h:

#define AID_RESERVED_DISK 1065   /* GID that has access to reserved disk space */

比如zygote能使用reserved disk,在32bits系统里在rootdir/init.zygote32.rc加入reserved_disk,如下:

service zygote /system/bin/app_process -Xzygote /system/bin --zygote --start-system-server
    class main
    priority -20
    user root
    group root readproc reserved_disk

可以用id命令显示uid/gid等信息:

xxx:/ # ps -A | grep system_server
USER           PID  PPID     VSZ    RSS WCHAN            ADDR S NAME
system        2888   286 1127972  73976 SyS_epoll_wait b008a658 S system_server
xxx:/ #
xxx:/ # id system
uid=1000(system) gid=1000(system) groups=1000(system), context=u:r:su:s0
xxx:/ #
xxx:/ # id reserved_disk
uid=1065(reserved_disk) gid=1065(reserved_disk) groups=1065(reserved_disk), cont
ext=u:r:su:s0

Done.