compact index是EROFS默认使用的压缩布局,内核提交描述如下:

staging: erofs: add compacted ondisk compression indexes

This patch introduces new compacted compression indexes.

In contract to legacy compression indexes that
each 4k logical cluster has an 8-byte index,
compacted ondisk compression indexes will have
amortized 2 bytes for each 4k logical cluster (compacted 2B)
amortized 4 bytes for each 4k logical cluster (compacted 4B)

In detail, several continuous clusters will be encoded in
a compacted pack with cluster types, offsets, and one blkaddr
at the end of the pack to leave 4-byte margin for better
decoding performance, as illustrated below:
_____________________________________________
|___@_____ encoded bits __________|_ blkaddr _|
0 . amortized * vcnt
. .
. . amortized * vcnt - 4
. .
.___________________.
|_type_|_clusterofs_|

Note that compacted 2 / 4B should be aligned with 32 / 8 bytes
in order to avoid each pack crossing page boundary.

以下分析参考erofs-utils 1.3。

legacy compression的index是8个字节,对应一个4KB的lcluster:

struct z_erofs_vle_decompressed_index {
__le16 di_advise;
/* where to decompress in the head cluster */
__le16 di_clusterofs;

union {
/* for the head cluster */
__le32 blkaddr;
/*
* for the rest clusters
* eg. for 4k page-sized cluster, maximum 4K*64k = 256M)
* [0] - pointing to the head cluster
* [1] - pointing to the tail cluster
*/
__le16 delta[2];
} di_u;
};

一个文件的块index的大小indexsize

static unsigned int vle_compressmeta_capacity(erofs_off_t filesize)
{
const unsigned int indexsize = BLK_ROUND_UP(filesize) *
sizeof(struct z_erofs_vle_decompressed_index);

return Z_EROFS_LEGACY_MAP_HEADER_SIZE + indexsize;
}

Z_EROFS_LEGACY_MAP_HEADER_SIZE:

struct z_erofs_map_header {
__le32 h_reserved1;
__le16 h_advise;
/*
* bit 0-3 : algorithm type of head 1 (logical cluster type 01);
* bit 4-7 : algorithm type of head 2 (logical cluster type 11).
*/
__u8 h_algorithmtype;
/*
* bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
* bit 3-7 : reserved.
*/
__u8 h_clusterbits;
};

#define Z_EROFS_VLE_LEGACY_HEADER_PADDING 8

#define Z_EROFS_LEGACY_MAP_HEADER_SIZE \
(sizeof(struct z_erofs_map_header) + Z_EROFS_VLE_LEGACY_HEADER_PADDING)

在写压缩文件时:

u8 *compressmeta = malloc(vle_compressmeta_capacity(inode->i_size));
if (!compressmeta)
return -ENOMEM;

注意到z_erofs_map_header也占用8Bytes。也就是如下布局图:

             +----legacymetasize/extent_isize-------+
| |
+------------+ index start index end
|inode|xattrs| | |
+------------|---------+------|-----+-----+----+----|
| map hdr | pad | 8B | 8B | ...|last|
|---------+------+-----+-----+----+----|
| | |
compressmeta metacur(1st) metacur(last)
| |
+-LEGACY MAP HDR-+

ok,我们来看compact index,在z_erofs_convert_to_compacted_format():

const unsigned int mpos = Z_EROFS_VLE_EXTENT_ALIGN(inode->inode_isize +
inode->xattr_isize) +
sizeof(struct z_erofs_map_header);

留意到cluster type只有3种:

enum {
Z_EROFS_VLE_CLUSTER_TYPE_PLAIN = 0,
Z_EROFS_VLE_CLUSTER_TYPE_HEAD = 1,
Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD = 2,
Z_EROFS_VLE_CLUSTER_TYPE_RESERVED = 3,
Z_EROFS_VLE_CLUSTER_TYPE_MAX
};

用2个bits就可以了:

#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS        2

di_clusterofs有效数据只要12bits即可,也就是说type + clusterofs/delta,14bits就可以搞定,也就是compact 2B。

if (destsize == 4) {
vcnt = 2;
} else if (destsize == 2 && logical_clusterbits == 12) {
vcnt = 16;
} else {
return ERR_PTR(-EINVAL);
}
encodebits = (vcnt * destsize * 8 - 32) / vcnt;

对compact 2b,encodebits是(16x2Bx8-32)/16=14 bits, 对齐 32bits。

                            in
8B align |
------------|---------+------|-----+-----+----+----|
| map hdr | pad | 8B | 8B | ...|last|
|---------|------+-----+-----+----+----|
|
mpos

因为compact 2b需要32bits对齐,如果mpos没有32bits对齐,那就要补上就有了compact_4b_initial

compacted_4b_initial = (32 - mpos % 32) / 4;

then,算下2b有多少:

compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);

剩下的就是compacted_4b_end:

compacted_4b_end = totalidx - compacted_4b_initial - compacted_2b;

来看out的生成:

type + offset的14bits分成了1个字节和6bits,so rem就是6,那剩下的2bits需要从next获取,然后拼接:

ch = out[pos / 8] & ((1 << rem) - 1);
out[pos / 8] = (v << rem) | ch;

因为2bits被用了,那剩下的放到下一个字节,也就是:

out[pos / 8 + 1] = v >> (8 - rem); 

在此基础上再右移8bits得到下一个,也就是:

out[pos / 8 + 2] = v >> (16 - rem);

Done.