问题

Android 10.0 + QCOM平台 + uefi,刷完机屏幕提示设备损坏然后进入了fastboot,屏幕提示如下:

Your device is corrupt. It can't be trusted and will not boot...

一上来会认为刷机有问题?有时候,你的眼睛背叛了你的心。

分析

首先,获取串口log,主要部分:

Active Slot _a is bootable, retry count 5
Booting from slot (_a)
Booting Into Mission Mode
Load Image vbmeta_a total time: 2 ms
Load Image vbmeta_system_a total time: 1 ms
avb_slot_verify.c:657: ERROR: vbmeta_system_a: Image rollback index is less than the stored rollback index.
Load Image boot_a total time: 43 ms
Load Image dtbo_a total time: 4 ms
No bootable slots found enter fastboot mode
VB2: boot state: red(3)
//重启
...
Slot _a is unbootable, trying alternate slot
Err: line:1603 FindBootableSlot() status: Load Error
Err: line:1386 LoadImageAndAuth() status: Load Error
LoadImageAndAuth failed: Load Error
Launching fastboot

有两个问题,一是device corrupt,一是进了fastboot。ok,我们先看avb的一个错误:

avb_slot_verify.c:657: ERROR: vbmeta_system_a: Image rollback index is less than the stored rollback index.

一开始没留意到这个错误,因为正常启动时也有其他avb错误(记得是有),所以就没注意到这个关键问题。这也告诉我们,该打印error level的才打印error,该打warning的打warning,不要乱打,尤其是release版本,会误导debug,言归正传,看代码:

static AvbSlotVerifyResult load_and_verify_vbmeta(
    AvbOps* ops,
    const char* const* requested_partitions,
    const char* ab_suffix,
    bool allow_verification_error,
    AvbVBMetaImageFlags toplevel_vbmeta_flags,
    int rollback_index_location,
    const char* partition_name,
    size_t partition_name_len,
    const uint8_t* expected_public_key,
    size_t expected_public_key_length,
    AvbSlotVerifyData* slot_data,
    AvbAlgorithmType* out_algorithm_type) {
  ...
  if (vbmeta_header.rollback_index < stored_rollback_index) {
    avb_errorv(
        full_partition_name,
        ": Image rollback index is less than the stored rollback index.\n", //tj: here
        NULL);
    ret = AVB_SLOT_VERIFY_RESULT_ERROR_ROLLBACK_INDEX;
    if (!allow_verification_error) {
      goto out;
    }
  }
  ...
out:
  /* If |vbmeta_image_data| isn't NULL it means that it adopted
   * |vbmeta_buf| so in that case don't free it here.
   */
  if (vbmeta_image_data == NULL) {
    if (vbmeta_buf != NULL) {
      avb_free(vbmeta_buf);
    }
  }
  if (descriptors != NULL) {
    avb_free(descriptors);
  }
  return ret;
}

这里的allow_verification_error就是device unlock,我们是lock,直接goto out了。retAVB_SLOT_VERIFY_RESULT_ERROR_ROLLBACK_INDEX。这个涉及到avb secure相关,先略过。

继续看:

AvbSlotVerifyResult avb_slot_verify(AvbOps* ops,
  ...
  ret = load_and_verify_vbmeta(ops,
                               requested_partitions,
                               ab_suffix,
                               allow_verification_error,
                               0 /* toplevel_vbmeta_flags */,
                               0 /* rollback_index_location */,
                               "vbmeta",
                               avb_strlen("vbmeta"),
                               NULL /* expected_public_key */,
                               0 /* expected_public_key_length */,
                               slot_data,
                               &algorithm_type);
  if ((!allow_verification_error && ret != AVB_SLOT_VERIFY_RESULT_OK) ||
      !result_should_continue(ret)) {
    goto fail;
  }
  ...
fail:
  if (slot_data != NULL) {
    avb_slot_verify_data_free(slot_data);
  }
  return ret;

goto fail,因为fail所以slot_data仍然是NULL,当前配置是avb 2.0。

STATIC EFI_STATUS
LoadImageAndAuthVB2 (BootInfo *Info)
{
  ...
    Result = avb_slot_verify (Ops, (CONST CHAR8 *CONST *)RequestedPartition,
                SlotSuffix, VerifyFlags, VerityFlags, &SlotData);
  }

  if (SlotData == NULL) {
    Status = EFI_LOAD_ERROR;
    Info->BootState = RED;  // tj: here
    goto out;
  }
  ...
out:
  if (Status != EFI_SUCCESS) {
    if (SlotData != NULL) {
      avb_slot_verify_data_free (SlotData);
    }
    if (Ops != NULL) {
      AvbOpsFree (Ops);
    }
    if (UserData != NULL) {
      avb_free (UserData);
    }
    if (VBData != NULL) {
      avb_free (VBData);
    }
    Info->BootState = RED;
    if (Info->MultiSlotBoot) {
      HandleActiveSlotUnbootable ();
      /* HandleActiveSlotUnbootable should have swapped slots and
       * reboot the device. If no bootable slot found, enter fastboot
       */
      DEBUG ((EFI_D_WARN, "No bootable slots found enter fastboot mode\n"));
    } else {
       DEBUG ((EFI_D_WARN,
           "Non Multi-slot: Unbootable entering fastboot mode\n"));
    }
  }

  DEBUG ((EFI_D_INFO, "VB2: boot state: %a(%d)\n",  //tj: here
        VbSn[Info->BootState].name, Info->BootState)); 
  return Status;
}

avb_slot_verify出来后,因为SlotData == NULL直接Info->BootState = RED了。这里有个HandleActiveSlotUnbootable,来看下:

EFI_STATUS HandleActiveSlotUnbootable (VOID)
{
  ...
  /* Mark current Slot as unbootable */
  ...
  if (FirstBoot && !TargetBuildVariantUser ()) {
    DEBUG ((EFI_D_VERBOSE, "FirstBoot, skipping slot Unbootable\n"));
    FirstBoot = FALSE;
  } else {
    BootEntry->PartEntry.Attributes |=
        (PART_ATT_UNBOOTABLE_VAL) & (~PART_ATT_SUCCESSFUL_VAL);
    UpdatePartitionAttributes (PARTITION_ATTRIBUTES);
  }
  ...

  return EFI_LOAD_ERROR;
}

会把当前槽(这里是_a)置为unbootable,我们在fastboot getvar也能看到:

(bootloader) current-slot:a
(bootloader) has-slot:boot:yes
(bootloader) slot-retry-count:b:7
(bootloader) slot-unbootable:b:no
(bootloader) slot-successful:b:no
(bootloader) slot-retry-count:a:5
(bootloader) slot-unbootable:a:yes //tj: here
(bootloader) slot-successful:a:no

这里也有log: No bootable slots found enter fastboot mode, then VB2: boot state: red。继续check:

EFI_STATUS
LoadImageAndAuth (BootInfo *Info)
{
  ...
  } else {
    Slot CurrentSlot = {{0}};

    GUARD (FindBootableSlot (&CurrentSlot));
    if (IsSuffixEmpty (&CurrentSlot)) {
      DEBUG ((EFI_D_ERROR, "No bootable slot\n"));
      return EFI_LOAD_ERROR;
    }
  ...
  case AVB_1:
    Status = LoadImageAndAuthVB1 (Info);
    break;
  case AVB_2:
    Status = LoadImageAndAuthVB2 (Info);
    break;
  ...
  if (IsUnlocked () && Status != EFI_SUCCESS) {
    DEBUG ((EFI_D_ERROR, "LoadImageAndAuth failed %r\n", Status));
    return Status;
  }

  if (AVBVersion != AVB_LE) {
    DisplayVerifiedBootScreen (Info);  //tj: here
    DEBUG ((EFI_D_VERBOSE, "Sending Milestone Call\n"));
    Status = Info->VbIntf->VBSendMilestone (Info->VbIntf);
    if (Status != EFI_SUCCESS) {
      DEBUG ((EFI_D_ERROR, "Error sending milestone call to TZ\n"));
      return Status;
    }
  }
  return Status;

LoadImageAndAuthVB2失败后如果unlock直接返回了,我们这里是lock,会走DisplayVerifiedBootScreen

STATIC EFI_STATUS
DisplayVerifiedBootScreen (BootInfo *Info)
{
  ...
  DEBUG ((EFI_D_VERBOSE, "Boot State is : %d\n", Info->BootState));
  switch (Info->BootState) {
  case RED:
    Status = DisplayVerifiedBootMenu (DISPLAY_MENU_RED);
    if (Status != EFI_SUCCESS) {
      DEBUG ((EFI_D_INFO,
              "Your device is corrupt. It can't be trusted and will not boot."
              "\nYour device will shutdown in 30s\n"));
    }
    MicroSecondDelay (30000000);
    ShutdownDevice (); //tj: here
    break;

aha...这里显示并复位的。DisplayVerifiedBootMenu这里会显示device is corrupt,code就不贴了。ok,接下来就是:

Slot _a is unbootable, trying alternate slot

是在FindBootableSlot:

EFI_STATUS
FindBootableSlot (Slot *BootableSlot)
{
  if (Unbootable == 0 && BootSuccess == 1) {
    DEBUG (
        (EFI_D_VERBOSE, "Active Slot %s is bootable\n", BootableSlot->Suffix));
  } else if (Unbootable == 0 && BootSuccess == 0 && RetryCount > 0) {
    if ((!IsABRetryCountDisabled () &&
        !IsBootDevImage ()) &&
      IsABRetryCountUpdateRequired ()) {
      RetryCount--;
      BootEntry->PartEntry.Attributes &= ~PART_ATT_MAX_RETRY_COUNT_VAL;
      BootEntry->PartEntry.Attributes |= RetryCount
                                         << PART_ATT_MAX_RETRY_CNT_BIT;
      UpdatePartitionAttributes (PARTITION_ATTRIBUTES);
      DEBUG ((EFI_D_INFO, "Active Slot %s is bootable, retry count %ld\n", //tj: first boot log
              BootableSlot->Suffix, RetryCount));
    } else {
      DEBUG ((EFI_D_INFO, "A/B retry count NOT decremented\n"));
    }
  } else {
    DEBUG ((EFI_D_INFO, "Slot %s is unbootable, trying alternate slot\n", //tj: second boot log
            BootableSlot->Suffix));
    GUARD_OUT (HandleActiveSlotUnbootable ());
  }

因为red了,前面已经把Unbootable置为1,So,走了GUARD_OUT (HandleActiveSlotUnbootable ()):

#define GUARD(code)                                                            \
  do {                                                                         \
    Status = (code);                                                           \
    if (Status != EFI_SUCCESS) {                                               \
      DEBUG ((EFI_D_ERROR, "Err: line:%d %a() status: %r\n", __LINE__,         \
              __FUNCTION__, Status));                                          \
      return Status;                                                           \
    }                                                                          \
  } while (0)

有错就直接返回了。HandleActiveSlotUnbootable直接返回EFI_LOAD_ERROR了,其他不可能发生。

FindBootableSlot也在LoadImageAndAuth(),再贴下:

  } else {
    Slot CurrentSlot = {{0}};

    GUARD (FindBootableSlot (&CurrentSlot));

GUARD直接返回了。继续往前看:

EFI_STATUS EFIAPI  __attribute__ ( (no_sanitize ("safe-stack")))
LinuxLoaderEntry (IN EFI_HANDLE ImageHandle, IN EFI_SYSTEM_TABLE *SystemTable)
{
    ...
    BootInfo Info = {0};
    Info.MultiSlotBoot = MultiSlotBoot;
    Info.BootIntoRecovery = BootIntoRecovery;
    Info.BootReasonAlarm = BootReasonAlarm;
    Status = LoadImageAndAuth (&Info); //tj: here
    if (Status != EFI_SUCCESS) {
      DEBUG ((EFI_D_ERROR, "LoadImageAndAuth failed: %r\n", Status));
      goto fastboot;
    }

    BootLinux (&Info);
  }

yup,LoadImageAndAuth failed, finished!

So, root cause is why AVB_SLOT_VERIFY_RESULT_ERROR_ROLLBACK_INDEX happened,下篇分解:)