前方用户提了个故障说手机发热低电下重启后发现有SD掉卡提示,Android_N/Kernel_3.18,离线日志找了半天,logcat日志里发现:

434   439 E vold    : Failed to pclose /system/bin/sgdisk --android-dump /dev/block/vold/disk:179:64 : Success
434 439 W vold : disk:179:64 has unknown partition table; trying entire device

代码在system/vold/Disk.cpp:

status_t Disk::readPartitions() {
...
// Ugly last ditch effort, treat entire disk as partition
if (table == Table::kUnknown || !foundParts) {
LOG(WARNING) << mId << " has unknown partition table; trying entire device";

std::string fsType;
std::string unused;
if (ReadMetadataUntrusted(mDevPath, fsType, unused, unused) == OK) {
createPublicVolume(mDevice);
} else {
LOG(WARNING) << mId << " failed to identify, giving up";
}
}
}

这个函数往上再看看:

status_t Disk::readPartitions() {
...
// Parse partition table

std::vector<std::string> cmd;
cmd.push_back(kSgdiskPath);
cmd.push_back("--android-dump");
cmd.push_back(mDevPath);

std::vector<std::string> output;
LOG(WARNING) << "ForkExe before";
status_t res = ForkExecvp(cmd, output);
if (res != OK) {
LOG(WARNING) << "sgdisk failed to scan " << mDevPath;
notifyEvent(ResponseCode::DiskScanned);
mJustPartitioned = false;
return res;
}

它是用sgdisk工具来分析分区表,只不过分析失败了,内核log里mmc1啥错也没有,不过有sgdisk权限出错:

[    4.064447] SELinux: inode_doinit_with_dentry:  context_to_sid(u:object_r:sgdisk_exec:s0) returned 12 for dev=dm-0 ino=666
[ 4.064656] type=1400 audit(1726218.699:6): avc: denied { execute } for pid=441 comm="sh" name="sgdisk" dev="dm-0" ino=666 scontext=u:r:vold:s0 tcontext=u:object_r:unlabeled:s0 tclass=file permissive=0

怎么会有sgdisk这个错,看了下正常的log里是没有的。我决定复现看看,在尝试了多次后尽然让我复现了,和低电量就没有关系,复现以后拔出SD卡再插入仍然提示SD corrupted,dmesg多出来:

[  459.510818] mmc1: new ultra high speed SDR25 SDHC card at address 0001
[ 459.514104] mmcblk1: mmc1:0001 SD16G 14.4 GiB
[ 459.518791] mmcblk1: p1
[ 459.545324] type=1400 audit(1515436020.539:39): avc: denied { execute } for p
id=4850 comm="sh" name="sgdisk" dev="dm-0" ino=666 scontext=u:r:vold:s0 tcontext
=u:object_r:unlabeled:s0 tclass=file permissive=0

内核已经检测出来mmc1,不过sgdisk权限出错,我们再看下出错时sgdisk权限:

xxx:/ # ls -Z /system/bin/sgdisk
u:object_r:unlabeled:s0 /system/bin/sgdisk

sgdisk文件权限确是在system/sepolicy/file_contexts定义过的:

/system/bin/sgdisk      u:object_r:sgdisk_exec:s0
/system/bin/blkid u:object_r:blkid_exec:s0
/system/bin/tzdatacheck u:object_r:tzdatacheck_exec:s0

再看看其他的:

xxx:/ # ls -Z /system/bin/blkid
u:object_r:blkid_exec:s0 /system/bin/blkid
xxx:/ # ls -Z /system/bin/ | grep unlabel
u:object_r:unlabeled:s0 sgdisk
xxx:/ #

点背,只有sgdisk有问题,file_contexts是被编译到file_contexts.bin里的,参考Android.mk:

LOCAL_MODULE := file_contexts.bin
local_fc_files := $(LOCAL_PATH)/file_contexts

ok, sgdisk为什么没有权限明明已经定义,注意前面还有个内核态错误:

[    4.064447] SELinux: inode_doinit_with_dentry:  context_to_sid(u:object_r:sgdisk_exec:s0) returned 12 for dev=dm-0 ino=666

这是什么意思?暂且不管,我们先跟下代码在security/selinux/hooks.c:

1271 /* The inode's security attributes must be initialized before first use. */
1272 static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dentry)
1273 {
...
1377 } else {
1378 rc = security_context_to_sid_default(context, rc, &sid,
1379 sbsec->def_sid,
1380 GFP_NOFS);
1381 if (rc) {
1382 char *dev = inode->i_sb->s_id;
1383 unsigned long ino = inode->i_ino;
1384
1385 if (rc == -EINVAL) {
1386 if (printk_ratelimit())
1387 printk(KERN_NOTICE "SELinux: inode=%lu on dev=%s was found to have an invalid "
1388 "context=%s. This indicates you may need to relabel the inode or the "
1389 "filesystem in question.\n", ino, dev, context);
1390 } else {
1391 printk(KERN_WARNING "SELinux: %s: context_to_sid(%s) "
1392 "returned %d for dev=%s ino=%ld\n",
1393 __func__, context, -rc, dev, ino);
1394 }
1395 kfree(context);
1396 /* Leave with the unlabeled SID */
1397 rc = 0;
1398 break;
1399 }
1400 }
1401 kfree(context);
1402 isec->sid = sid;
1403 break;

1391行出错,continue to track:

1486 /**
1487 * security_context_to_sid_default - Obtain a SID for a given security context,
1488 * falling back to specified default if needed.
1489 *
1490 * @scontext: security context
1491 * @scontext_len: length in bytes
1492 * @sid: security identifier, SID
1493 * @def_sid: default SID to assign on error
1494 *
1495 * Obtains a SID associated with the security context that
1496 * has the string representation specified by @scontext.
1497 * The default SID is passed to the MLS layer to be used to allow
1498 * kernel labeling of the MLS field if the MLS field is not present
1499 * (for upgrading to MLS without full relabel).
1500 * Implicitly forces adding of the context even if it cannot be mapped yet.
1501 * Returns -%EINVAL if the context is invalid, -%ENOMEM if insufficient
1502 * memory is available, or 0 on success.
1503 */
1504 int security_context_to_sid_default(const char *scontext, u32 scontext_len,
1505 u32 *sid, u32 def_sid, gfp_t gfp_flags)
1506 {
1507 return security_context_to_sid_core(scontext, scontext_len,
1508 sid, def_sid, gfp_flags, 1);
1509 }

读下注释,return就是三个:0, -EINVAL, -ENOMEM。ENOMEM刚好是12。啥,insufficient memory? ENOMEM太多了,加了打印最后发现是在security/selinux/ss/sidtab.c:

198 int sidtab_context_to_sid(struct sidtab *s,
199 struct context *context,
200 u32 *out_sid)
201 {
...
206 *out_sid = SECSID_NULL;
207
208 sid = sidtab_search_cache(s, context);
209 if (!sid)
210 sid = sidtab_search_context(s, context);
211 if (!sid) {
212 spin_lock_irqsave(&s->lock, flags);
213 /* Rescan now that we hold the lock. */
214 sid = sidtab_search_context(s, context);
215 if (sid){
216 goto unlock_out;
217 }
218 /* No SID exists for the context. Allocate a new one. */
219 if (s->next_sid == UINT_MAX || s->shutdown) {
220 ret = -ENOMEM;
221 printk(KERN_INFO
222 "SELinux: next_sid=%d, shutdown=%d ret=%d.\n",
223 s->next_sid, s->shutdown, ret);
224 goto unlock_out;
225 }
226 sid = s->next_sid++;
227 if (context->len)
228 printk(KERN_INFO
229 "SELinux: Context %s is not valid (left unmapped).\n",
230 context->str);
231 ret = sidtab_insert(s, sid, context);
232 if (ret)
233 s->next_sid--;
234 unlock_out:
235 spin_unlock_irqrestore(&s->lock, flags);
236 }
237
238 if (ret){
239 printk("return ret is %d\n", ret);
240 return ret;
241 }
242
243 *out_sid = sid;
244 return 0;
245 }

s->shutdown = 1,这个才是关键,这个为啥是no memory? 查看代码发现只有sidtab_shutdown会设置为1:

317 void sidtab_shutdown(struct sidtab *s)
318 {
319 unsigned long flags;
320
321 spin_lock_irqsave(&s->lock, flags);
322 s->shutdown = 1;
323 spin_unlock_irqrestore(&s->lock, flags);
324 }

那我们看下sidtab_shutdown call stack:

sel_write_load -> security_load_policy -> sidtab_shutdown
static const struct file_operations sel_load_ops = {
.write = sel_write_load,
.llseek = generic_file_llseek,
};

可以看出来sel_write_load应该是由app通过write接口触发的,具体是在external/libselinux, call stack:

selinux_android_load_policy -> selinux_android_load_policy_helper -> security_load_policy -> write( "%s/load"

在它的上面init在call:

init main -> selinux_initialize -> selinux_android_load_policy(in kernel_domain)

回过头来再细看下kernel的security_load_policy:

sel_write_load :
|-> copy_from_user
|-> security_load_policy
|-> policydb_read -> avtab_read -> avtab_alloc : print "SELinux: %d avtab hash slots, %d rules"
|-> sidtab_shutdown (set shutdown=1)
|-> sidtab_set (set shudown=0)
|-> print "policy loaded"

也就说在vold执行sgdisk时,因为shutdown=1导致内核selinux报错,我们再看下报错log:

[    4.064447] SELinux: inode_doinit_with_dentry:  context_to_sid(u:object_r:sgdisk_exec:s0) returned 12 for dev=dm-0 ino=666

这里的sgdisk的context应该已经识别到了,就是sgdisk_exec,只不过在内核处理to sid时出错了,导致最终写到内核失败。

sgdisk的权限在file_contexts.bin里,可以看下logcat:

01-07 12:08:33.715  1860  1860 I auditd  : type=1400 audit(0.0:19): avc: denied { execute } for comm="sh" name="sgdisk" dev="dm-0" ino=666 scontext=u:r:vold:s0 tcontext=u:object_r:unlabeled:s0 tclass=file permissive=0
01-07 12:08:31.346 1459 1613 I SELinux : SELinux: Loaded file_contexts contexts from /file_contexts.bin.

能看到,在12:08:31.346时file_contexts.bin已经loaded了,而权限报错是在2s后发生的,所以和load file_contexts时间无关,问题就是shutdown=1为什么会抛出来。

我在shutdown的地方加些打印,发现security_load_policy走了2遍,一是在init first stage,一个是在init second stage,first stage是load的/sepolicy文件,second stage是load的/sepolicy_B。

从加的打印能看出来,在load sepolicy_B流程中shutdown=1后面会shutdown=0,而vold刚好踩在这个中间,这也证明了为啥偶尔会出现这个问题:

[    4.090567] SELinux:  Class can_socket not defined in policy.
[ 4.090574] SELinux: the above unknown classes and permissions will be denied
[ 4.090579] shutdown=1 in sidtab_shutdown
[ 4.091293] SELinux: next_sid=294, shutdown=1 ret=-12.
[ 4.091305] SELinux: inode_doinit_with_dentry: context_to_sid(u:object_r:sgdisk_exec:s0) returned 12 for dev=dm-0 ino=666
[ 4.091391] shutdown=0 in sidtab_set
[ 4.091491] type=1400 audit(4328788.739:6): avc: denied { execute } for pid=444 comm="sh" name="sgdisk" dev="dm-0" ino=666 scontext=u:r:vold:s0 tcontext=u:object_r:unlabeled:s0 tclass=file permissive=0
[ 4.147648] type=1403 audit(4328788.789:7): policy loaded auid=4294967295 ses=4294967295

sepolicy_B是我们自己添加的,原生流程只有sepolicy是在kernel domain完成。是不是有问题?带着这个问题我们重看下init main:

static void selinux_initialize(bool in_kernel_domain) {
Timer t;

selinux_callback cb;
cb.func_log = selinux_klog_callback;
selinux_set_callback(SELINUX_CB_LOG, cb);
cb.func_audit = audit_callback;
selinux_set_callback(SELINUX_CB_AUDIT, cb);

if (in_kernel_domain) {
ERROR("Loading SELinux policy...\n");
if (selinux_android_load_policy() < 0) {
ERROR("failed to load policy: %s\n", strerror(errno));
security_failure();
}

bool kernel_enforcing = (security_getenforce() == 1);
bool is_enforcing = selinux_is_enforcing();
if (kernel_enforcing != is_enforcing) {
if (security_setenforce(is_enforcing)) {
ERROR("security_setenforce(%s) failed: %s\n",
is_enforcing ? "true" : "false", strerror(errno));
security_failure();
}
}

if (write_file("/sys/fs/selinux/checkreqprot", "0") == -1) {
security_failure();
}

NOTICE("(Initializing SELinux %s took %.2fs.)\n",
is_enforcing ? "enforcing" : "non-enforcing", t.duration());
} else {
ERROR("init all handles...\n");
selinux_init_all_handles();
}
}

int main(int argc, char** argv) {
...
NOTICE("init %s started!\n", is_first_stage ? "first stage" : "second stage");
...
// Set up SELinux, including loading the SELinux policy if we're in the kernel domain.
selinux_initialize(is_first_stage);
}

也就是selinux的初始化分两部分,一在first stage完成sepolicy加载,二是second stage会load file_contexts.bin等,这是本来的流程。而我们在second stage又添加了一个sepolicy加载,这个会把内核的load policy在用户态再走一边,比如shutdown=1就会干扰用户态执行权限,应该是错误的,那我去掉这个sepolicy_B,测了多遍,故障消失:]

把这个sepolicy_B放到first stage加载应该也能解决问题,没测了。