Android Go data分区启用了f2fs文件系统,从介绍看是为了提高life time和4K文件读写性能,这个暂不关心,三星init,看kernel提交记录后面主要是huawei也参与进来,据说是重金聘用,多少米?

前段时间项目在Go上遇到一个问题:data填满100%一直起不来(kernel: 3.18)。不过可以使用保留空间加一层保护, 重要系统进程可以使用保留区,其他不可用, rt?,ext4这个特性叫resgid,3.18 f2fs还不支持,我从kernel 4.9移下来,基于这份代码分析。

先看下保留空间相关结构:

struct f2fs_sb_info {
...
block_t reserved_blocks; /* configurable reserved blocks */
block_t current_reserved_blocks; /* current reserved blocks */
block_t root_reserved_blocks; /* root reserved blocks */
kuid_t s_resuid; /* reserved blocks for uid */
kgid_t s_resgid; /* reserved blocks for gid */

其实reserved_blockscurrent_reserved_blocks是一个reserved feature,主要目的应该是提高性能,在sysfs下可配置。而root_reserved_blocksresuid,resgid才是我们现在这个需求,用man来解释:

man mount:

resgid=n and resuid=n
The ext2 filesystem reserves a certain percentage of the available space (by default 5%, see mke2fs(8) and tune2fs(8)). These options determine who can use the reserved blocks.
(Roughly: whoever has the specified uid, or belongs to the specified group.)

man tune2fs:

-r reserved-blocks-count
Set the number of reserved filesystem blocks.

-g group
Set the group which can use the reserved filesystem blocks. The group parameter can be a numerical gid or a group name. If a group name is given, it is converted to a numerical gid before it is stored in the superblock.

ok, 关键函数:

static inline bool __allow_reserved_blocks(struct f2fs_sb_info *sbi)
{
if (!test_opt(sbi, RESERVE_ROOT))
return false;
if (capable(CAP_SYS_RESOURCE))
return true;
if (uid_eq(sbi->s_resuid, current_fsuid()))
return true;
if (!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) &&
in_group_p(sbi->s_resgid))
return true;
return false;
}

首先检查mount是否有RESERVE_BOOT选项,如果没设置就不让用。

#define set_opt(sbi, option)	((sbi)->mount_opt.opt |= F2FS_MOUNT_##option)
#define test_opt(sbi, option) ((sbi)->mount_opt.opt & F2FS_MOUNT_##option)

哪里set_opt了这个了, 在parse_options里:

case Opt_reserve_root:
if (args->from && match_int(args, &arg))
return -EINVAL;
if (test_opt(sbi, RESERVE_ROOT)) {
f2fs_msg(sb, KERN_INFO,
"Preserve previous reserve_root=%u",
sbi->root_reserved_blocks);
} else { //tj: 走这里
sbi->root_reserved_blocks = arg;
set_opt(sbi, RESERVE_ROOT);
}

mount会解析这个选项:

static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data)
{
return mount_bdev(fs_type, flags, dev_name, data, f2fs_fill_super);
}

f2fs_fill_super会call parse_options:

static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
{
...
/* parse mount options */
options = kstrdup((const char *)data, GFP_KERNEL);
if (data && !options) {
err = -ENOMEM;
goto free_sb_buf;
}

err = parse_options(sb, options);
if (err)
goto free_options;

ok, 看下s_resgid allow:

if (!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) &&
in_group_p(sbi->s_resgid))
return true;
#define KGIDT_INIT(value) (kgid_t){ value }
#define GLOBAL_ROOT_GID KGIDT_INIT(0)

resgid=0的不让用保留了,in_group_p:

/*
* Check whether we're fsgid/egid or in the supplemental group..
*/
int in_group_p(kgid_t grp)
{
const struct cred *cred = current_cred();
int retval = 1;

if (!gid_eq(grp, cred->fsgid))
retval = groups_search(cred->group_info, grp);
return retval;
}

应该就是这个grp有没有加到系统里,可见mount flag要加了reserve_root才能激活。

另外,保留块有大小限制:

static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
{
block_t limit = (sbi->user_block_count << 1) / 1000;

/* limit is 0.2% */
if (test_opt(sbi, RESERVE_ROOT) && sbi->root_reserved_blocks > limit) {
sbi->root_reserved_blocks = limit;
f2fs_msg(sbi->sb, KERN_INFO,
"Reduce reserved blocks for root = %u",
sbi->root_reserved_blocks);
}
if (!test_opt(sbi, RESERVE_ROOT) &&
(!uid_eq(sbi->s_resuid,
make_kuid(&init_user_ns, F2FS_DEF_RESUID)) ||
!gid_eq(sbi->s_resgid,
make_kgid(&init_user_ns, F2FS_DEF_RESGID))))
f2fs_msg(sbi->sb, KERN_INFO,
"Ignore s_resuid=%u, s_resgid=%u w/o reserve_root",
from_kuid_munged(&init_user_ns, sbi->s_resuid),
from_kgid_munged(&init_user_ns, sbi->s_resgid));
}

关于id,Android里叫AID,在system/core/libcutils/include/private/android_filesystem_config.h:

#define AID_RESERVED_DISK 1065   /* GID that has access to reserved disk space */

比如zygote能使用reserved disk,在32bits系统里在rootdir/init.zygote32.rc加入reserved_disk,如下:

service zygote /system/bin/app_process -Xzygote /system/bin --zygote --start-system-server
class main
priority -20
user root
group root readproc reserved_disk

可以用id命令显示uid/gid等信息:

xxx:/ # ps -A | grep system_server
USER PID PPID VSZ RSS WCHAN ADDR S NAME
system 2888 286 1127972 73976 SyS_epoll_wait b008a658 S system_server
xxx:/ #
xxx:/ # id system
uid=1000(system) gid=1000(system) groups=1000(system), context=u:r:su:s0
xxx:/ #
xxx:/ # id reserved_disk
uid=1065(reserved_disk) gid=1065(reserved_disk) groups=1065(reserved_disk), cont
ext=u:r:su:s0

Done.