最近遇到一个kmalloc内存分配失败的问题,是刚刷完手机第一次开机很大概率出现,重启后就好了,Android7.0/ARM64/kernel3.18。

01-09 04:55:23.878     0     0 W Thread-2: page allocation failure: order:4, mode:0xc0d0

order-4是16个page了。

call stack

01-12 08:34:21.699     0     0 I       : [<ffffffc00008a2cc>] show_stack+0x20/0x28
01-12 08:34:21.699 0 0 I : [<ffffffc001150420>] dump_stack+0x80/0xa4
01-12 08:34:21.699 0 0 I : [<ffffffc000182f98>] warn_alloc_failed+0x138/0x168
01-12 08:34:21.699 0 0 I : [<ffffffc0001868fc>] __alloc_pages_nodemask+0x72c/0x994
01-12 08:34:21.699 0 0 I : [<ffffffc000186cc4>] alloc_kmem_pages+0x34/0x40
01-12 08:34:21.699 0 0 I : [<ffffffc0001a10c4>] kmalloc_order+0x40/0xb8
01-12 08:34:21.699 0 0 I : [<ffffffc0001a1178>] kmalloc_order_trace+0x3c/0x108

warn_alloc_failed走的slowpath,看来fast path是失败了,水位应该比较低。

看下kmalloc:

static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
{
unsigned int order = get_order(size);
return kmalloc_order_trace(size, flags, order);
}

void *__kmalloc(size_t size, gfp_t flags)
{
struct kmem_cache *s;
void *ret;

if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
return kmalloc_large(size, flags);

s = kmalloc_slab(size, flags);
...
}

static __always_inline void *kmalloc(size_t size, gfp_t flags)
{
if (__builtin_constant_p(size)) {
if (size > KMALLOC_MAX_CACHE_SIZE)
return kmalloc_large(size, flags); //got it
#ifndef CONFIG_SLOB
if (!(flags & GFP_DMA)) {
int index = kmalloc_index(size);

if (!index)
return ZERO_SIZE_PTR;

return kmem_cache_alloc_trace(kmalloc_caches[index],
flags, size);
}
#endif
}
return __kmalloc(size, flags);
}

看下KMALLOC_MAX_CACHE_SIZE,现在手机内核配置基本都是slub。

/* Maximum size for which we actually use a slab cache */
#define KMALLOC_MAX_CACHE_SIZE (1UL << KMALLOC_SHIFT_HIGH)


#ifdef CONFIG_SLUB
/*
* SLUB directly allocates requests fitting in to an order-1 page
* (PAGE_SIZE*2). Larger requests are passed to the page allocator.
*/
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)

slub认为2个page以上就算大block走page alloctor.

arm64:

/* PAGE_SHIFT determines the page size */
#ifdef CONFIG_ARM64_64K_PAGES
#define PAGE_SHIFT 16
#else
#define PAGE_SHIFT 12
#endif
#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)

arm64/4k, so PAGE_SHIFT is 12

这里分配大小是order-4,看下get_order:

/**
* get_order - Determine the allocation order of a memory size
* @size: The size for which to get the order
*
* Determine the allocation order of a particular sized block of memory. This
* is on a logarithmic scale, where:
*
* 0 -> 2^0 * PAGE_SIZE and below
* 1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1
* 2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1
* 3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1
* 4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1
* ...
*
* The order returned is used to find the smallest allocation granule required
* to hold an object of the specified size.
*
* The result is undefined if the size is 0.
*
* This function may be used to initialise variables with compile time
* evaluations of constants.
*/
#define get_order(n) \
( \
__builtin_constant_p(n) ? ( \
((n) == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
(((n) < (1UL << PAGE_SHIFT)) ? 0 : \
ilog2((n) - 1) - PAGE_SHIFT + 1) \
) : \
__get_order(n) \
)

#endif /* __ASSEMBLY__ */

/*
* Runtime evaluation of get_order()
*/
static inline __attribute_const__
int __get_order(unsigned long size)
{
int order;

size--;
size >>= PAGE_SHIFT;
#if BITS_PER_LONG == 32
order = fls(size);
#else
order = fls64(size);
#endif
return order;
}

#ifdef CONFIG_64BIT
#define BITS_PER_LONG 64
#else
#define BITS_PER_LONG 32
#endif /* CONFIG_64BIT */

基本就是按照注释来的。

在slowpath最后有should_alloc_retry, 看下

/*
* PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed
* costly to service. That is between allocation orders which should
* coalesce naturally under reasonable reclaim pressure and those which
* will not.
*/
#define PAGE_ALLOC_COSTLY_ORDER 3

static inline int
should_alloc_retry(gfp_t gfp_mask, unsigned int order,
unsigned long did_some_progress,
unsigned long pages_reclaimed)
{
/* Do not loop if specifically requested */
if (gfp_mask & __GFP_NORETRY)
return 0;

/* Always retry if specifically requested */
if (gfp_mask & __GFP_NOFAIL)
return 1;

/*
* Suspend converts GFP_KERNEL to __GFP_WAIT which can prevent reclaim
* making forward progress without invoking OOM. Suspend also disables
* storage devices so kswapd will not help. Bail if we are suspending.
*/
if (!did_some_progress && pm_suspended_storage())
return 0;

/*
* In this implementation, order <= PAGE_ALLOC_COSTLY_ORDER
* means __GFP_NOFAIL, but that may not be true in other
* implementations.
*/
if (order <= PAGE_ALLOC_COSTLY_ORDER)
return 1;

/*
* For order > PAGE_ALLOC_COSTLY_ORDER, if __GFP_REPEAT is
* specified, then we retry until we no longer reclaim any pages
* (above), or we've reclaimed an order of pages at least as
* large as the allocation's order. In both cases, if the
* allocation still fails, we stop retrying.
*/
if (gfp_mask & __GFP_REPEAT && pages_reclaimed < (1 << order))
return 1;

return 0;
}

可以看出如果是order-3以内,一定会retry到有位置。

我们这里是order-4,已经算high order了,为了解决分配不到的问题,试了下__GFP_REPEAT还是不行,看来碎片严重啊,只能一直回收尝试,那就是__GFP_NOFAIL了,试了多次没啥阻塞,功能正常,虽然不推荐用。