问题

Android 10.0 + QCOM平台 + uefi,刷完机屏幕提示设备损坏然后进入了fastboot,屏幕提示如下:

Your device is corrupt. It can't be trusted and will not boot...

一上来会认为刷机有问题?有时候,你的眼睛背叛了你的心。

分析

首先,获取串口log,主要部分:

Active Slot _a is bootable, retry count 5
Booting from slot (_a)
Booting Into Mission Mode
Load Image vbmeta_a total time: 2 ms
Load Image vbmeta_system_a total time: 1 ms
avb_slot_verify.c:657: ERROR: vbmeta_system_a: Image rollback index is less than the stored rollback index.
Load Image boot_a total time: 43 ms
Load Image dtbo_a total time: 4 ms
No bootable slots found enter fastboot mode
VB2: boot state: red(3)
//重启
...
Slot _a is unbootable, trying alternate slot
Err: line:1603 FindBootableSlot() status: Load Error
Err: line:1386 LoadImageAndAuth() status: Load Error
LoadImageAndAuth failed: Load Error
Launching fastboot

有两个问题,一是device corrupt,一是进了fastboot。ok,我们先看avb的一个错误:

avb_slot_verify.c:657: ERROR: vbmeta_system_a: Image rollback index is less than the stored rollback index.

一开始没留意到这个错误,因为正常启动时也有其他avb错误(记得是有),所以就没注意到这个关键问题。这也告诉我们,该打印error level的才打印error,该打warning的打warning,不要乱打,尤其是release版本,会误导debug,言归正传,看代码:

static AvbSlotVerifyResult load_and_verify_vbmeta(
AvbOps* ops,
const char* const* requested_partitions,
const char* ab_suffix,
bool allow_verification_error,
AvbVBMetaImageFlags toplevel_vbmeta_flags,
int rollback_index_location,
const char* partition_name,
size_t partition_name_len,
const uint8_t* expected_public_key,
size_t expected_public_key_length,
AvbSlotVerifyData* slot_data,
AvbAlgorithmType* out_algorithm_type) {
...
if (vbmeta_header.rollback_index < stored_rollback_index) {
avb_errorv(
full_partition_name,
": Image rollback index is less than the stored rollback index.\n", //tj: here
NULL);
ret = AVB_SLOT_VERIFY_RESULT_ERROR_ROLLBACK_INDEX;
if (!allow_verification_error) {
goto out;
}
}
...
out:
/* If |vbmeta_image_data| isn't NULL it means that it adopted
* |vbmeta_buf| so in that case don't free it here.
*/
if (vbmeta_image_data == NULL) {
if (vbmeta_buf != NULL) {
avb_free(vbmeta_buf);
}
}
if (descriptors != NULL) {
avb_free(descriptors);
}
return ret;
}

这里的allow_verification_error就是device unlock,我们是lock,直接goto out了。retAVB_SLOT_VERIFY_RESULT_ERROR_ROLLBACK_INDEX。这个涉及到avb secure相关,先略过。

继续看:

AvbSlotVerifyResult avb_slot_verify(AvbOps* ops,
...
ret = load_and_verify_vbmeta(ops,
requested_partitions,
ab_suffix,
allow_verification_error,
0 /* toplevel_vbmeta_flags */,
0 /* rollback_index_location */,
"vbmeta",
avb_strlen("vbmeta"),
NULL /* expected_public_key */,
0 /* expected_public_key_length */,
slot_data,
&algorithm_type);
if ((!allow_verification_error && ret != AVB_SLOT_VERIFY_RESULT_OK) ||
!result_should_continue(ret)) {
goto fail;
}
...
fail:
if (slot_data != NULL) {
avb_slot_verify_data_free(slot_data);
}
return ret;

goto fail,因为fail所以slot_data仍然是NULL,当前配置是avb 2.0。

STATIC EFI_STATUS
LoadImageAndAuthVB2 (BootInfo *Info)
{
...
Result = avb_slot_verify (Ops, (CONST CHAR8 *CONST *)RequestedPartition,
SlotSuffix, VerifyFlags, VerityFlags, &SlotData);
}

if (SlotData == NULL) {
Status = EFI_LOAD_ERROR;
Info->BootState = RED; // tj: here
goto out;
}
...
out:
if (Status != EFI_SUCCESS) {
if (SlotData != NULL) {
avb_slot_verify_data_free (SlotData);
}
if (Ops != NULL) {
AvbOpsFree (Ops);
}
if (UserData != NULL) {
avb_free (UserData);
}
if (VBData != NULL) {
avb_free (VBData);
}
Info->BootState = RED;
if (Info->MultiSlotBoot) {
HandleActiveSlotUnbootable ();
/* HandleActiveSlotUnbootable should have swapped slots and
* reboot the device. If no bootable slot found, enter fastboot
*/
DEBUG ((EFI_D_WARN, "No bootable slots found enter fastboot mode\n"));
} else {
DEBUG ((EFI_D_WARN,
"Non Multi-slot: Unbootable entering fastboot mode\n"));
}
}

DEBUG ((EFI_D_INFO, "VB2: boot state: %a(%d)\n", //tj: here
VbSn[Info->BootState].name, Info->BootState));
return Status;
}

avb_slot_verify出来后,因为SlotData == NULL直接Info->BootState = RED了。这里有个HandleActiveSlotUnbootable,来看下:

EFI_STATUS HandleActiveSlotUnbootable (VOID)
{
...
/* Mark current Slot as unbootable */
...
if (FirstBoot && !TargetBuildVariantUser ()) {
DEBUG ((EFI_D_VERBOSE, "FirstBoot, skipping slot Unbootable\n"));
FirstBoot = FALSE;
} else {
BootEntry->PartEntry.Attributes |=
(PART_ATT_UNBOOTABLE_VAL) & (~PART_ATT_SUCCESSFUL_VAL);
UpdatePartitionAttributes (PARTITION_ATTRIBUTES);
}
...

return EFI_LOAD_ERROR;
}

会把当前槽(这里是_a)置为unbootable,我们在fastboot getvar也能看到:

(bootloader) current-slot:a
(bootloader) has-slot:boot:yes
(bootloader) slot-retry-count:b:7
(bootloader) slot-unbootable:b:no
(bootloader) slot-successful:b:no
(bootloader) slot-retry-count:a:5
(bootloader) slot-unbootable:a:yes //tj: here
(bootloader) slot-successful:a:no

这里也有log: No bootable slots found enter fastboot mode, then VB2: boot state: red。继续check:

EFI_STATUS
LoadImageAndAuth (BootInfo *Info)
{
...
} else {
Slot CurrentSlot = {{0}};

GUARD (FindBootableSlot (&CurrentSlot));
if (IsSuffixEmpty (&CurrentSlot)) {
DEBUG ((EFI_D_ERROR, "No bootable slot\n"));
return EFI_LOAD_ERROR;
}
...
case AVB_1:
Status = LoadImageAndAuthVB1 (Info);
break;
case AVB_2:
Status = LoadImageAndAuthVB2 (Info);
break;
...
if (IsUnlocked () && Status != EFI_SUCCESS) {
DEBUG ((EFI_D_ERROR, "LoadImageAndAuth failed %r\n", Status));
return Status;
}

if (AVBVersion != AVB_LE) {
DisplayVerifiedBootScreen (Info); //tj: here
DEBUG ((EFI_D_VERBOSE, "Sending Milestone Call\n"));
Status = Info->VbIntf->VBSendMilestone (Info->VbIntf);
if (Status != EFI_SUCCESS) {
DEBUG ((EFI_D_ERROR, "Error sending milestone call to TZ\n"));
return Status;
}
}
return Status;

LoadImageAndAuthVB2失败后如果unlock直接返回了,我们这里是lock,会走DisplayVerifiedBootScreen

STATIC EFI_STATUS
DisplayVerifiedBootScreen (BootInfo *Info)
{
...
DEBUG ((EFI_D_VERBOSE, "Boot State is : %d\n", Info->BootState));
switch (Info->BootState) {
case RED:
Status = DisplayVerifiedBootMenu (DISPLAY_MENU_RED);
if (Status != EFI_SUCCESS) {
DEBUG ((EFI_D_INFO,
"Your device is corrupt. It can't be trusted and will not boot."
"\nYour device will shutdown in 30s\n"));
}
MicroSecondDelay (30000000);
ShutdownDevice (); //tj: here
break;

aha…这里显示并复位的。DisplayVerifiedBootMenu这里会显示device is corrupt,code就不贴了。ok,接下来就是:

Slot _a is unbootable, trying alternate slot

是在FindBootableSlot:

EFI_STATUS
FindBootableSlot (Slot *BootableSlot)
{
if (Unbootable == 0 && BootSuccess == 1) {
DEBUG (
(EFI_D_VERBOSE, "Active Slot %s is bootable\n", BootableSlot->Suffix));
} else if (Unbootable == 0 && BootSuccess == 0 && RetryCount > 0) {
if ((!IsABRetryCountDisabled () &&
!IsBootDevImage ()) &&
IsABRetryCountUpdateRequired ()) {
RetryCount--;
BootEntry->PartEntry.Attributes &= ~PART_ATT_MAX_RETRY_COUNT_VAL;
BootEntry->PartEntry.Attributes |= RetryCount
<< PART_ATT_MAX_RETRY_CNT_BIT;
UpdatePartitionAttributes (PARTITION_ATTRIBUTES);
DEBUG ((EFI_D_INFO, "Active Slot %s is bootable, retry count %ld\n", //tj: first boot log
BootableSlot->Suffix, RetryCount));
} else {
DEBUG ((EFI_D_INFO, "A/B retry count NOT decremented\n"));
}
} else {
DEBUG ((EFI_D_INFO, "Slot %s is unbootable, trying alternate slot\n", //tj: second boot log
BootableSlot->Suffix));
GUARD_OUT (HandleActiveSlotUnbootable ());
}

因为red了,前面已经把Unbootable置为1,So,走了GUARD_OUT (HandleActiveSlotUnbootable ()):

#define GUARD(code)                                                            \
do { \
Status = (code); \
if (Status != EFI_SUCCESS) { \
DEBUG ((EFI_D_ERROR, "Err: line:%d %a() status: %r\n", __LINE__, \
__FUNCTION__, Status)); \
return Status; \
} \
} while (0)

有错就直接返回了。HandleActiveSlotUnbootable直接返回EFI_LOAD_ERROR了,其他不可能发生。

FindBootableSlot也在LoadImageAndAuth(),再贴下:

} else {
Slot CurrentSlot = {{0}};

GUARD (FindBootableSlot (&CurrentSlot));

GUARD直接返回了。继续往前看:

EFI_STATUS EFIAPI  __attribute__ ( (no_sanitize ("safe-stack")))
LinuxLoaderEntry (IN EFI_HANDLE ImageHandle, IN EFI_SYSTEM_TABLE *SystemTable)
{
...
BootInfo Info = {0};
Info.MultiSlotBoot = MultiSlotBoot;
Info.BootIntoRecovery = BootIntoRecovery;
Info.BootReasonAlarm = BootReasonAlarm;
Status = LoadImageAndAuth (&Info); //tj: here
if (Status != EFI_SUCCESS) {
DEBUG ((EFI_D_ERROR, "LoadImageAndAuth failed: %r\n", Status));
goto fastboot;
}

BootLinux (&Info);
}

yup,LoadImageAndAuth failed, finished!

So, root cause is why AVB_SLOT_VERIFY_RESULT_ERROR_ROLLBACK_INDEX happened,下篇分解:)