目录

  • What is high memory
  • kmap_atomic
  • kunmap_atomic

What is high memory

先看下内核文档kernel3.18 vm/highmem.txt的说明:

High memory (highmem) is used when the size of physical memory approaches or

exceeds the maximum size of virtual memory. At that point it becomes
impossible for the kernel to keep all of the available physical memory mapped
at all times. This means the kernel needs to start using temporary mappings of
the pieces of physical memory that it wants to access.

The part of (physical) memory not covered by a permanent mapping is what we

refer to as 'highmem'. There are various architecture dependent constraints on
where exactly that border lies.

The traditional split for architectures using this approach is 3:1, 3GiB for

userspace and the top 1GiB for kernel space:

           +--------+ 0xffffffff
           | Kernel |
           +--------+ 0xc0000000
           |        |   
           | User   |   
           |        |
           +--------+ 0x00000000

This means that the kernel can at most map 1GiB of physical memory at any one

time, but because we need virtual address space for other things - including
temporary maps to access the rest of the physical memory - the actual direct
map will typically be less (usually around ~896MiB).

在32bit系统里,一般内核可以访问的空间大小是1G,如果物理内存大小也是1G,不可能全部映射,因为内核空间还要访问其他比如外设register,所以没有被永久映射的那一块物理内存就叫高端内存,大小和架构有关。

如果物理内存大于1G,那最多永久映射1G,剩下的部分也就是高端内存了。permanent mapping的内存也叫low memory。通过temporary mapping(kmap_atomic for smp)访问高端内存。

64bits系统里内核空间很大,就没有high memory的概念了。

kmap_atomic分析

kmap已经不推荐用了,内核主要用kmap_atomic来创建temporary mapping。文档说明:

(*) kmap(). This permits a short duration mapping of a single page. It needs

 global synchronization, but is amortized somewhat.  It is also prone to
 deadlocks when using in a nested fashion, and so it is not recommended for 
 new code.

(*) kmap_atomic(). This permits a very short duration mapping of a single

 page.  Since the mapping is restricted to the CPU that issued it, it
 performs well, but the issuing task is therefore required to stay on that
 CPU until it has finished, lest some other task displace its mappings.

kmap_atomic() may also be used by interrupt contexts, since it is does not

 sleep and the caller may not sleep until after kunmap_atomic() is called.

It may be assumed that k[un]map_atomic() won't fail.

ARM high memory的代码主要分布在:

kernel3.18/mm/highmemory.c
kernel3.18/include/linux/highmem.h
kernel3.18/arch/arm/mm/highmem.c
kernel3.18/arch/arm/include/asm/highmem.h

kmap_atomic的定义在ARM里

void *kmap_atomic(struct page *page)
{
    ...
    pagefault_disable();
    if (!PageHighMem(page)) //如果页面不在high memory
        return page_address(page);
    ...

page_address有好3处定义:

#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
#define HASHED_PAGE_VIRTUAL
#endif

#if defined(WANT_PAGE_VIRTUAL)
static inline void *page_address(const struct page *page)
{
    return page->virtual;
}
static inline void set_page_address(struct page *page, void *address)
{
    page->virtual = address;
}
#define page_address_init()  do { } while(0)
#endif

#if defined(HASHED_PAGE_VIRTUAL)
void *page_address(const struct page *page);
void set_page_address(struct page *page, void *virtual);
void page_address_init(void);
#endif

#if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL)
#define page_address(page) lowmem_page_address(page)
#define set_page_address(page, address)  do { } while(0)
#define page_address_init()  do { } while(0)
#endif

ARM没有定义WANT_PAGE_VIRTUAL,是给其他arch使用的。

     /*  
         * On machines where all RAM is mapped into kernel address space,
         * we can simply calculate the virtual address. On machines with
         * highmem some memory is mapped into kernel virtual memory
         * dynamically, so we need a place to store that address.
         * Note that this field could be 16 bits on x86 ... ;)
         *
         * Architectures with slow multiplication can define
         * WANT_PAGE_VIRTUAL in asm/page.h
         */
#if defined(WANT_PAGE_VIRTUAL)
        void *virtual;                  /* Kernel virtual address (NULL if
                                           not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */

ARM的page_address在mm下:

void *page_address(const struct page *page)
{
    unsigned long flags;
    void *ret;
    struct page_address_slot *pas;

    if (!PageHighMem(page))
        return lowmem_page_address(page);
    ...

对于low memory的page直接计算:

#define PFN_PHYS(x)     ((phys_addr_t)(x) << PAGE_SHIFT)

static __always_inline void *lowmem_page_address(const struct page *page)
{
    return __va(PFN_PHYS(page_to_pfn(page)));
}

page_to_pfn在memory_model.h里定义:

涉及到linux支持的三种内存模型:CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM,这里就看FLAGMEM:

#if defined(CONFIG_FLATMEM)

#define __pfn_to_page(pfn)    (mem_map + ((pfn) - ARCH_PFN_OFFSET))
#define __page_to_pfn(page)    ((unsigned long)((page) - mem_map) + \
                 ARCH_PFN_OFFSET)
...
#endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */

#define page_to_pfn __page_to_pfn
#define pfn_to_page __pfn_to_page

看样子像是偏移计算,来看下mem_map和ARCH_PFN_OFFSET。

#ifndef CONFIG_NEED_MULTIPLE_NODES
/* use the per-pgdat data instead for discontigmem - mbligh */
unsigned long max_mapnr;
struct page *mem_map;

EXPORT_SYMBOL(max_mapnr);
EXPORT_SYMBOL(mem_map);
#endif

static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
{
        /* Skip empty nodes */
        if (!pgdat->node_spanned_pages)
                return;

#ifdef CONFIG_FLAT_NODE_MEM_MAP
        ...
#ifndef CONFIG_NEED_MULTIPLE_NODES
        /*   
         * With no DISCONTIG, the global mem_map is just set as node 0's
         */
        if (pgdat == NODE_DATA(0)) {
                mem_map = NODE_DATA(0)->node_mem_map;
        ...
}
config NEED_MULTIPLE_NODES
        def_bool y
        depends on DISCONTIGMEM || NUMA

对于手机配置目前一般不是NUMA,也就是1个node。看下node_mem_map:

typedef struct pglist_data {
    ...
    int nr_zones;
#ifdef CONFIG_FLAT_NODE_MEM_MAP    /* means !SPARSEMEM */
    struct page *node_mem_map;
#ifdef CONFIG_MEMCG
    struct page_cgroup *node_page_cgroup;
#endif
    ...

分配处理

typedef struct pglist_data {
    ...
    unsigned long node_spanned_pages; /* total size of physical page
                         range, including holes */
    ...
}pg_data_t;

static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
{
    return pgdat->node_start_pfn + pgdat->node_spanned_pages;
}

static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
{
    /* Skip empty nodes */
    if (!pgdat->node_spanned_pages)
        return;

#ifdef CONFIG_FLAT_NODE_MEM_MAP
    /* ia64 gets its own node_mem_map, before this, without bootmem */
    if (!pgdat->node_mem_map) {
        unsigned long size, start, end;
        struct page *map;

        /*
         * The zone's endpoints aren't required to be MAX_ORDER
         * aligned but the node_mem_map endpoints must be in order
         * for the buddy allocator to function correctly.
         */
        start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
        end = pgdat_end_pfn(pgdat);
        end = ALIGN(end, MAX_ORDER_NR_PAGES);
        size =  (end - start) * sizeof(struct page);
        map = alloc_remap(pgdat->node_id, size);
        if (!map)
            map = memblock_virt_alloc_node_nopanic(size,
                                   pgdat->node_id);
        pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
    }

start, end, size都有了,具体分配调用了alloc_remap:

#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
extern void *alloc_remap(int nid, unsigned long size);
#else
static inline void *alloc_remap(int nid, unsigned long size)
{
        return NULL;
}
#endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */

ARM没有定义CONFIG_HAVE_ARCH_ALLOC_REMAP,走memblock_virt_alloc_node_nopanic,分配涉及到bootmem,暂略过。

可以看出,这个pgdat->node_mem_map/map空间存放着所有的物理page,mem_map指向开头。也就是如下图:

mem_map
  |
  +-------+-------+-----+-------+
  | page1 | page2 | ... | pageN |
  +-------+-------+-----+-------+

ok,再回头看page_to_pfn就是要找到index,直接page - mem_map就可以了,加上pfn偏移ARCH_PFN_OFFSET

#ifdef CONFIG_KERNEL_RAM_BASE_ADDRESS
#define PAGE_OFFSET             (CONFIG_KERNEL_RAM_BASE_ADDRESS)
#else
#define PAGE_OFFSET             (0)
#endif

#ifndef ARCH_PFN_OFFSET
#define ARCH_PFN_OFFSET         (PAGE_OFFSET >> PAGE_SHIFT)
#endif

再通过PFN_PHYS转换为地址,这里的地址就是物理地址,为啥,因为lowmemory是一一对应的。ok,用__va转化为虚拟地址。

PAGE_SHIFT用来决定page size:

#define PAGE_SHIFT      12
#ifdef __ASSEMBLY__
#define PAGE_SIZE       (1 << PAGE_SHIFT)  //4KB
#else
#define PAGE_SIZE       (1UL << PAGE_SHIFT)