compact index是EROFS默认使用的压缩布局,内核提交描述如下:

    staging: erofs: add compacted ondisk compression indexes

    This patch introduces new compacted compression indexes.

    In contract to legacy compression indexes that
       each 4k logical cluster has an 8-byte index,
    compacted ondisk compression indexes will have
       amortized 2 bytes for each 4k logical cluster (compacted 2B)
       amortized 4 bytes for each 4k logical cluster (compacted 4B)

    In detail, several continuous clusters will be encoded in
    a compacted pack with cluster types, offsets, and one blkaddr
    at the end of the pack to leave 4-byte margin for better
    decoding performance, as illustrated below:
       _____________________________________________
      |___@_____ encoded bits __________|_ blkaddr _|
      0       .                                     amortized * vcnt
      .          .
      .             .                   amortized * vcnt - 4
      .                .
      .___________________.
      |_type_|_clusterofs_|

    Note that compacted 2 / 4B should be aligned with 32 / 8 bytes
    in order to avoid each pack crossing page boundary.

以下分析参考erofs-utils 1.3。

legacy compression的index是8个字节,对应一个4KB的lcluster:

struct z_erofs_vle_decompressed_index {
    __le16 di_advise;
    /* where to decompress in the head cluster */
    __le16 di_clusterofs;

    union {
        /* for the head cluster */
        __le32 blkaddr;
        /*
         * for the rest clusters
         * eg. for 4k page-sized cluster, maximum 4K*64k = 256M)
         * [0] - pointing to the head cluster
         * [1] - pointing to the tail cluster
         */
        __le16 delta[2];
    } di_u;
};

一个文件的块index的大小indexsize

static unsigned int vle_compressmeta_capacity(erofs_off_t filesize)
{
    const unsigned int indexsize = BLK_ROUND_UP(filesize) *
        sizeof(struct z_erofs_vle_decompressed_index);

    return Z_EROFS_LEGACY_MAP_HEADER_SIZE + indexsize;
}

Z_EROFS_LEGACY_MAP_HEADER_SIZE:

struct z_erofs_map_header {
    __le32    h_reserved1;
    __le16    h_advise;
    /*
     * bit 0-3 : algorithm type of head 1 (logical cluster type 01);
     * bit 4-7 : algorithm type of head 2 (logical cluster type 11).
     */
    __u8    h_algorithmtype;
    /*
     * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
     * bit 3-7 : reserved.
     */
    __u8    h_clusterbits;
};

#define Z_EROFS_VLE_LEGACY_HEADER_PADDING       8

#define Z_EROFS_LEGACY_MAP_HEADER_SIZE    \
    (sizeof(struct z_erofs_map_header) + Z_EROFS_VLE_LEGACY_HEADER_PADDING)

在写压缩文件时:

    u8 *compressmeta = malloc(vle_compressmeta_capacity(inode->i_size));
    if (!compressmeta)
        return -ENOMEM;

注意到z_erofs_map_header也占用8Bytes。也就是如下布局图:

             +----legacymetasize/extent_isize-------+
             |                                      |
+------------+            index start         index end
|inode|xattrs|                |                     |
+------------|---------+------|-----+-----+----+----|
             | map hdr | pad  | 8B  | 8B  | ...|last|
             |---------+------+-----+-----+----+----|
             |                |                     |
       compressmeta       metacur(1st)         metacur(last)
             |                |
             +-LEGACY MAP HDR-+                    

ok,我们来看compact index,在z_erofs_convert_to_compacted_format():

    const unsigned int mpos = Z_EROFS_VLE_EXTENT_ALIGN(inode->inode_isize +
                               inode->xattr_isize) +
                  sizeof(struct z_erofs_map_header);

留意到cluster type只有3种:

enum {
    Z_EROFS_VLE_CLUSTER_TYPE_PLAIN        = 0,
    Z_EROFS_VLE_CLUSTER_TYPE_HEAD        = 1,
    Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD    = 2,
    Z_EROFS_VLE_CLUSTER_TYPE_RESERVED    = 3,
    Z_EROFS_VLE_CLUSTER_TYPE_MAX
};

用2个bits就可以了:

#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS        2

di_clusterofs有效数据只要12bits即可,也就是说type + clusterofs/delta,14bits就可以搞定,也就是compact 2B。

        if (destsize == 4) {
                vcnt = 2;
        } else if (destsize == 2 && logical_clusterbits == 12) {
                vcnt = 16;
        } else {
                return ERR_PTR(-EINVAL);
        }
        encodebits = (vcnt * destsize * 8 - 32) / vcnt;

对compact 2b,encodebits是(16x2Bx8-32)/16=14 bits, 对齐 32bits。

                            in
         8B align            |
------------|---------+------|-----+-----+----+----|
            | map hdr | pad  | 8B  | 8B  | ...|last|
            |---------|------+-----+-----+----+----|
                      |
                      mpos

因为compact 2b需要32bits对齐,如果mpos没有32bits对齐,那就要补上就有了compact_4b_initial

compacted_4b_initial = (32 - mpos % 32) / 4;

then,算下2b有多少:

compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);

剩下的就是compacted_4b_end:

compacted_4b_end = totalidx - compacted_4b_initial - compacted_2b;

来看out的生成:

type + offset的14bits分成了1个字节和6bits,so rem就是6,那剩下的2bits需要从next获取,然后拼接:

ch = out[pos / 8] & ((1 << rem) - 1);
out[pos / 8] = (v << rem) | ch;

因为2bits被用了,那剩下的放到下一个字节,也就是:

out[pos / 8 + 1] = v >> (8 - rem); 

在此基础上再右移8bits得到下一个,也就是:

out[pos / 8 + 2] = v >> (16 - rem);

Done.