最近遇到一个kmalloc内存分配失败的问题,是刚刷完手机第一次开机很大概率出现,重启后就好了,Android7.0/ARM64/kernel3.18。

01-09 04:55:23.878     0     0 W Thread-2: page allocation failure: order:4, mode:0xc0d0

order-4是16个page了。

call stack

01-12 08:34:21.699     0     0 I       : [<ffffffc00008a2cc>] show_stack+0x20/0x28
01-12 08:34:21.699     0     0 I       : [<ffffffc001150420>] dump_stack+0x80/0xa4
01-12 08:34:21.699     0     0 I       : [<ffffffc000182f98>] warn_alloc_failed+0x138/0x168
01-12 08:34:21.699     0     0 I       : [<ffffffc0001868fc>] __alloc_pages_nodemask+0x72c/0x994
01-12 08:34:21.699     0     0 I       : [<ffffffc000186cc4>] alloc_kmem_pages+0x34/0x40
01-12 08:34:21.699     0     0 I       : [<ffffffc0001a10c4>] kmalloc_order+0x40/0xb8
01-12 08:34:21.699     0     0 I       : [<ffffffc0001a1178>] kmalloc_order_trace+0x3c/0x108

warn_alloc_failed走的slowpath,看来fast path是失败了,水位应该比较低。

看下kmalloc:

static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
{
        unsigned int order = get_order(size);
        return kmalloc_order_trace(size, flags, order);
}

void *__kmalloc(size_t size, gfp_t flags)
{
    struct kmem_cache *s;
    void *ret;

    if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
        return kmalloc_large(size, flags);

    s = kmalloc_slab(size, flags);
    ...
}

static __always_inline void *kmalloc(size_t size, gfp_t flags)
{
        if (__builtin_constant_p(size)) {
                if (size > KMALLOC_MAX_CACHE_SIZE)
                        return kmalloc_large(size, flags); //got it
#ifndef CONFIG_SLOB
                if (!(flags & GFP_DMA)) {
                        int index = kmalloc_index(size);

                        if (!index)
                                return ZERO_SIZE_PTR;

                        return kmem_cache_alloc_trace(kmalloc_caches[index],
                                        flags, size);
                }   
#endif
        }   
        return __kmalloc(size, flags);
}

看下KMALLOC_MAX_CACHE_SIZE,现在手机内核配置基本都是slub。

/* Maximum size for which we actually use a slab cache */
#define KMALLOC_MAX_CACHE_SIZE  (1UL << KMALLOC_SHIFT_HIGH)


#ifdef CONFIG_SLUB
/*
 * SLUB directly allocates requests fitting in to an order-1 page
 * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
 */
#define KMALLOC_SHIFT_HIGH      (PAGE_SHIFT + 1)

slub认为2个page以上就算大block走page alloctor.

arm64:

/* PAGE_SHIFT determines the page size */
#ifdef CONFIG_ARM64_64K_PAGES
#define PAGE_SHIFT              16
#else
#define PAGE_SHIFT              12
#endif
#define PAGE_SIZE               (_AC(1,UL) << PAGE_SHIFT)

arm64/4k, so PAGE_SHIFT is 12

这里分配大小是order-4,看下get_order:

/**
 * get_order - Determine the allocation order of a memory size
 * @size: The size for which to get the order
 *
 * Determine the allocation order of a particular sized block of memory.  This
 * is on a logarithmic scale, where:
 *
 *      0 -> 2^0 * PAGE_SIZE and below
 *      1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1
 *      2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1
 *      3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1
 *      4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1
 *      ...
 *
 * The order returned is used to find the smallest allocation granule required
 * to hold an object of the specified size.
 *
 * The result is undefined if the size is 0.
 *
 * This function may be used to initialise variables with compile time
 * evaluations of constants.
 */
#define get_order(n)                                            \
(                                                               \
        __builtin_constant_p(n) ? (                             \
                ((n) == 0UL) ? BITS_PER_LONG - PAGE_SHIFT :     \
                (((n) < (1UL << PAGE_SHIFT)) ? 0 :              \
                 ilog2((n) - 1) - PAGE_SHIFT + 1)               \
        ) :                                                     \
        __get_order(n)                                          \
)

#endif  /* __ASSEMBLY__ */

/*
 * Runtime evaluation of get_order()
 */
static inline __attribute_const__
int __get_order(unsigned long size)
{
        int order;

        size--;
        size >>= PAGE_SHIFT;
#if BITS_PER_LONG == 32
        order = fls(size);
#else
        order = fls64(size);
#endif
        return order;
}

#ifdef CONFIG_64BIT
#define BITS_PER_LONG 64
#else
#define BITS_PER_LONG 32
#endif /* CONFIG_64BIT */

基本就是按照注释来的。

在slowpath最后有should_alloc_retry, 看下

/*
 * PAGE_ALLOC_COSTLY_ORDER is the order at which allocations are deemed
 * costly to service.  That is between allocation orders which should
 * coalesce naturally under reasonable reclaim pressure and those which
 * will not.
 */
#define PAGE_ALLOC_COSTLY_ORDER 3

static inline int
should_alloc_retry(gfp_t gfp_mask, unsigned int order,
                unsigned long did_some_progress,
                unsigned long pages_reclaimed)
{
    /* Do not loop if specifically requested */
    if (gfp_mask & __GFP_NORETRY)
        return 0;

    /* Always retry if specifically requested */
    if (gfp_mask & __GFP_NOFAIL)
        return 1;

    /*
     * Suspend converts GFP_KERNEL to __GFP_WAIT which can prevent reclaim
     * making forward progress without invoking OOM. Suspend also disables
     * storage devices so kswapd will not help. Bail if we are suspending.
     */
    if (!did_some_progress && pm_suspended_storage())
        return 0;

    /*
     * In this implementation, order <= PAGE_ALLOC_COSTLY_ORDER
     * means __GFP_NOFAIL, but that may not be true in other
     * implementations.
     */
    if (order <= PAGE_ALLOC_COSTLY_ORDER)
        return 1;

    /*
     * For order > PAGE_ALLOC_COSTLY_ORDER, if __GFP_REPEAT is
     * specified, then we retry until we no longer reclaim any pages
     * (above), or we've reclaimed an order of pages at least as
     * large as the allocation's order. In both cases, if the
     * allocation still fails, we stop retrying.
     */
    if (gfp_mask & __GFP_REPEAT && pages_reclaimed < (1 << order))
        return 1;

    return 0;
}

可以看出如果是order-3以内,一定会retry到有位置。

我们这里是order-4,已经算high order了,为了解决分配不到的问题,试了下__GFP_REPEAT还是不行,看来碎片严重啊,只能一直回收尝试,那就是__GFP_NOFAIL了,试了多次没啥阻塞,功能正常,虽然不推荐用。