CMA全称Contiguous Memory Allocator,是Linux内核的连续内存分配器,最近就遇到一个因CMA分配失败导致的camera功能异常,log如下:

<6>[ 3371.081458] alloc_contig_range: [8fa00, 8ff00) PFNs busy
<6>[ 3371.081481] alloc_contig_range: [8fa00, 90000) PFNs busy
<6>[ 3371.191527] alloc_contig_range: [8f800, 8fd00) PFNs busy
<6>[ 3371.191568] alloc_contig_range: [8f900, 8fe00) PFNs busy
<6>[ 3371.191619] cma: cma_alloc: alloc failed, req-size: 1280 pages, ret: -16

主要代码在mm/cma.c, cma_debug.c里,高通平台Kernel 4.9,先看下CMA结构:

struct cma {
unsigned long base_pfn;
unsigned long count;
unsigned long *bitmap;
unsigned int order_per_bit; /* Order of pages represented by one bit */
struct mutex lock;
#ifdef CONFIG_CMA_DEBUGFS
struct hlist_head mem_head;
spinlock_t mem_head_lock;
#endif
const char *name;
};

base_pfn就是cma区域的base page frame number,count是cma size in page,cma area用bitmap来管理,每个bit代表多少页用order_per_bit来表示,lock就是访问bitmap互斥用了。

另外还有2个全局:

extern struct cma cma_areas[MAX_CMA_AREAS];
extern unsigned cma_area_count;

很容易理解,cma区域有cma_area_count个,每个区域是cma_areas,最大8个。

/*
* There is always at least global CMA area and a few optional
* areas configured in kernel .config.
*/
#ifdef CONFIG_CMA_AREAS
#define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS)

#else
#define MAX_CMA_AREAS (0)
config CMA_AREAS
int "Maximum count of the CMA areas"
depends on CMA
default 7
help
CMA allows to create CMA areas for particular purpose, mainly,
used as device private area. This parameter sets the maximum
number of CMA area in the system.

If unsure, leave the default value "7".

看下初始化:

/**
* cma_init_reserved_mem() - create custom contiguous area from reserved memory
* @base: Base address of the reserved area
* @size: Size of the reserved area (in bytes),
* @order_per_bit: Order of pages represented by one bit on bitmap.
* @name: The name of the area. If this parameter is NULL, the name of
* the area will be set to "cmaN", where N is a running counter of
* used areas.
* @res_cma: Pointer to store the created cma region.
*
* This function creates custom contiguous area from already reserved memory.
*/
int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
unsigned int order_per_bit,
const char *name,
struct cma **res_cma)
{
...
/*
* Each reserved area must be initialised later, when more kernel
* subsystems (like slab allocator) are available.
*/
cma = &cma_areas[cma_area_count];
if (name) {
cma->name = name;
} else {
cma->name = kasprintf(GFP_KERNEL, "cma%d\n", cma_area_count);
if (!cma->name)
return -ENOMEM;
}
cma->base_pfn = PFN_DOWN(base);
cma->count = size >> PAGE_SHIFT; //tj: cma area size in pages
cma->order_per_bit = order_per_bit;
*res_cma = cma;
cma_area_count++;
totalcma_pages += (size / PAGE_SIZE);

return 0;
}

会有两个caller,一个是early_init_fdt_scan_reserved_mem(),一个是dma_contiguous_reserve(),都是从setup_arch()而来,看下arm64的:

void __init arm64_memblock_init(void)
{
...
/*
* Register the kernel text, kernel data, initrd, and initial
* pagetables with memblock.
*/
memblock_reserve(__pa_symbol(_text), _end - _text);
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start) {
memblock_reserve(initrd_start, initrd_end - initrd_start);

/* the generic initrd code expects virtual addresses */
initrd_start = __phys_to_virt(initrd_start);
initrd_end = __phys_to_virt(initrd_end);
}
#endif

early_init_fdt_scan_reserved_mem(); //tj: caller

/* 4GB maximum for 32-bit only capable devices */
if (IS_ENABLED(CONFIG_ZONE_DMA))
arm64_dma_phys_limit = max_zone_dma_phys();
else
arm64_dma_phys_limit = PHYS_MASK + 1;
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
dma_contiguous_reserve(arm64_dma_phys_limit); //tj: caller

设备树保留最后会调用rmem_cma_setup()

static int __init rmem_cma_setup(struct reserved_mem *rmem)
{
phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
phys_addr_t mask = align - 1;
unsigned long node = rmem->fdt_node;
struct cma *cma;
int err;

if (!of_get_flat_dt_prop(node, "reusable", NULL) ||
of_get_flat_dt_prop(node, "no-map", NULL))
return -EINVAL;

if ((rmem->base & mask) || (rmem->size & mask)) {
pr_err("Reserved memory: incorrect alignment of CMA region\n");
return -EINVAL;
}

err = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma); //tj:here
if (err) {
pr_err("Reserved memory: unable to setup CMA region\n");
return err;
}
/* Architecture specific contiguous memory fixup. */
dma_contiguous_early_fixup(rmem->base, rmem->size);

if (of_get_flat_dt_prop(node, "linux,cma-default", NULL))
dma_contiguous_set_default(cma);

rmem->ops = &rmem_cma_ops;
rmem->priv = cma;

pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n",
&rmem->base, (unsigned long)rmem->size / SZ_1M);

return 0;
}

另一处调用from dma:

/**
* dma_contiguous_reserve() - reserve area(s) for contiguous memory handling
* @limit: End address of the reserved memory (optional, 0 for any).
*
* This function reserves memory from early allocator. It should be
* called by arch specific code once the early allocator (memblock or bootmem)
* has been activated and all other subsystems have already allocated/reserved
* memory.
*/
void __init dma_contiguous_reserve(phys_addr_t limit)
{
phys_addr_t selected_size = 0;
phys_addr_t selected_base = 0;
phys_addr_t selected_limit = limit;
bool fixed = false;

pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit);

if (size_cmdline != -1) {
selected_size = size_cmdline;
selected_base = base_cmdline;
selected_limit = min_not_zero(limit_cmdline, limit);
if (base_cmdline + size_cmdline == limit_cmdline)
fixed = true;
} else {
#ifdef CONFIG_CMA_SIZE_SEL_MBYTES
selected_size = size_bytes;
#elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE)
selected_size = cma_early_percent_memory();
#elif defined(CONFIG_CMA_SIZE_SEL_MIN)
selected_size = min(size_bytes, cma_early_percent_memory());
#elif defined(CONFIG_CMA_SIZE_SEL_MAX)
selected_size = max(size_bytes, cma_early_percent_memory());
#endif
}

if (selected_size && !dma_contiguous_default_area) {
pr_debug("%s: reserving %ld MiB for global area\n", __func__,
(unsigned long)selected_size / SZ_1M);

dma_contiguous_reserve_area(selected_size, selected_base,
selected_limit,
&dma_contiguous_default_area,
fixed);
}
}
#ifdef CONFIG_CMA_SIZE_MBYTES
#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
#else
#define CMA_SIZE_MBYTES 0
#endif

struct cma *dma_contiguous_default_area;

/*
* Default global CMA area size can be defined in kernel's .config.
* This is useful mainly for distro maintainers to create a kernel
* that works correctly for most supported systems.
* The size can be set in bytes or as a percentage of the total memory
* in the system.
*
* Users, who want to set the size of global CMA area for their system
* should use cma= kernel parameter.
*/
static const phys_addr_t size_bytes = (phys_addr_t)CMA_SIZE_MBYTES * SZ_1M;
static phys_addr_t size_cmdline = -1;

默认的global CMA大小是16M:

if  DMA_CMA
comment "Default contiguous memory area size:"

config CMA_SIZE_MBYTES
int "Size in Mega Bytes"
depends on !CMA_SIZE_SEL_PERCENTAGE
default 0 if X86
default 16
help
Defines the size (in MiB) of the default memory area for Contiguous
Memory Allocator. If the size of 0 is selected, CMA is disabled by
default, but it can be enabled by passing cma=size[MG] to the kernel.

继续看:

int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
phys_addr_t limit, struct cma **res_cma,
bool fixed)
{
int ret;

ret = cma_declare_contiguous(base, size, limit, 0, 0, fixed,
"reserved", res_cma);
int __init cma_declare_contiguous(phys_addr_t base,
phys_addr_t size, phys_addr_t limit,
phys_addr_t alignment, unsigned int order_per_bit,
bool fixed, const char *name, struct cma **res_cma)
{
...
/* Reserve memory */
if (fixed) {
if (memblock_is_region_reserved(base, size) ||
memblock_reserve(base, size) < 0) {
ret = -EBUSY;
goto err;
}
} else {
phys_addr_t addr = 0;

/*
* All pages in the reserved area must come from the same zone.
* If the requested region crosses the low/high memory boundary,
* try allocating from high memory first and fall back to low
* memory in case of failure.
*/
if (base < highmem_start && limit > highmem_start) {
addr = memblock_alloc_range(size, alignment,
highmem_start, limit,
MEMBLOCK_NONE);
limit = highmem_start;
}

if (!addr) {
addr = memblock_alloc_range(size, alignment, base,
limit,
MEMBLOCK_NONE);
if (!addr) {
ret = -ENOMEM;
goto err;
}
}

/*
* kmemleak scans/reads tracked objects for pointers to other
* objects but this address isn't mapped and accessible
*/
kmemleak_ignore_phys(addr);
base = addr;
}

ret = cma_init_reserved_mem(base, size, order_per_bit, name, res_cma);
if (ret)
goto err;

pr_info("Reserved %ld MiB at %pa\n", (unsigned long)size / SZ_1M,
&base);
return 0;

可见,是通过memblock来保留内存的。

还有个初始化接口是cma_active_area(),挂在do_one_initcall()上:

static int __init cma_activate_area(struct cma *cma)
{
int bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma)) * sizeof(long);
unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
unsigned i = cma->count >> pageblock_order;
struct zone *zone;

cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL);

if (!cma->bitmap)
return -ENOMEM;
static inline unsigned long cma_bitmap_maxno(struct cma *cma)
{
return cma->count >> cma->order_per_bit;
}

先申请bitmap大小,cma area size in pages就是cma->count,cma area size in bits就是cma->count >> order_per_bit,cma area size in pageblock就是cma->count >> pageblock_order,rt?

下来按pageblock初始化:

zone = page_zone(pfn_to_page(pfn));

do {
unsigned j;

base_pfn = pfn;
for (j = pageblock_nr_pages; j; --j, pfn++) {
WARN_ON_ONCE(!pfn_valid(pfn));
/*
* alloc_contig_range requires the pfn range
* specified to be in the same zone. Make this
* simple by forcing the entire CMA resv range
* to be in the same zone.
*/
if (page_zone(pfn_to_page(pfn)) != zone)
goto not_in_zone;
}
init_cma_reserved_pageblock(pfn_to_page(base_pfn));
} while (--i);

主要接口是init_cma_reserved_pageblock()

#ifdef CONFIG_CMA
/* Free whole pageblock and set its migration type to MIGRATE_CMA. */
void __init init_cma_reserved_pageblock(struct page *page)
{
unsigned i = pageblock_nr_pages;
struct page *p = page;

do {
__ClearPageReserved(p);
set_page_count(p, 0);
} while (++p, --i);

set_pageblock_migratetype(page, MIGRATE_CMA);

if (pageblock_order >= MAX_ORDER) {
i = pageblock_nr_pages;
p = page;
do {
set_page_refcounted(p);
__free_pages(p, MAX_ORDER - 1);
p += MAX_ORDER_NR_PAGES;
} while (i -= MAX_ORDER_NR_PAGES);
} else {
set_page_refcounted(page);
__free_pages(page, pageblock_order);
}

adjust_managed_page_count(page, pageblock_nr_pages);
}
#endif

能看出来会设置pageblock migrate type为MIGRATE_CMA,然后用__free_pages去释放,涉及buddy分配器,以后看。

来看分配接口:

/**
* cma_alloc() - allocate pages from contiguous area
* @cma: Contiguous memory region for which the allocation is performed.
* @count: Requested number of pages.
* @align: Requested alignment of pages (in PAGE_SIZE order).
* @gfp_mask: GFP mask to use during compaction
*
* This function allocates part of contiguous memory on specific
* contiguous memory area.
*/
struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
gfp_t gfp_mask)
{
...
for (;;) {
mutex_lock(&cma->lock);
bitmap_no = bitmap_find_next_zero_area_off(cma->bitmap,
bitmap_maxno, start, bitmap_count, mask,
offset);
if (bitmap_no >= bitmap_maxno) {
mutex_unlock(&cma->lock);
break;
}
bitmap_set(cma->bitmap, bitmap_no, bitmap_count);
/*
* It's safe to drop the lock here. We've marked this region for
* our exclusive use. If the migration fails we will take the
* lock again and unmark it.
*/
mutex_unlock(&cma->lock);

pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
mutex_lock(&cma_mutex);
ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA,
gfp_mask);
mutex_unlock(&cma_mutex);
if (ret == 0) {
page = pfn_to_page(pfn);
break; //tj: 分配成功
}

cma_clear_bitmap(cma, pfn, count);
if (ret != -EBUSY)
break;

pr_debug("%s(): memory range at %p is busy, retrying\n",
__func__, pfn_to_page(pfn));
/* try again with a bit different memory target */
start = bitmap_no + mask + 1;
}
...
if (ret && !(gfp_mask & __GFP_NOWARN)) {
pr_err("%s: alloc failed, req-size: %zu pages, ret: %d\n",
__func__, count, ret);
cma_debug_show_areas(cma);
}

主要调用alloc_contig_range()来分配,分配成功会set bitmap,释放接口cma_release()主要call free_contig_range()来释放并clear bitmap。

遇到失败的可以放开cma_debug_show_areas()来看下,会打印可用pages,可以调大size试试。

另外,debugfs里path是/sys/kernel/debug/cma,比如default 16M global cma area:

xxx:/sys/kernel/debug/cma/cma-reserved # ls
alloc base_pfn bitmap count free maxchunk order_per_bit used

这里的bitmap貌似没啥意义!?