Android P lmkd新增了许多机制和属性,包括引入原来内核的minfree算法等,看样子是越来越复杂了,下面来看看,内核版本4.9,高通平台。

how to kill

先看下应该杀掉哪个process or 哪些processes(yes, 已经支持)?

主要改动是:

  • support kill heaviest process
  • support kill multiple processes
/*
 * Find processes to kill to free required number of pages.
 * If pages_to_free is set to 0 only one process will be killed.
 * Returns the size of the killed processes.
 */
static int find_and_kill_processes(enum vmpressure_level level,
                                   int min_score_adj, int pages_to_free) {
    int i;
    int killed_size;
    int pages_freed = 0;

#ifdef LMKD_LOG_STATS
    bool lmk_state_change_start = false;
#endif

    for (i = OOM_SCORE_ADJ_MAX; i >= min_score_adj; i--) {
        struct proc *procp;

        while (true) {
            procp = kill_heaviest_task ?
                proc_get_heaviest(i) : proc_adj_lru(i);

            if (!procp)
                break;
    kill_heaviest_task =
        property_get_bool("ro.lmk.kill_heaviest_task", false);

可见还是从大adj开始,如果我们要kill heaviest task, 那就选个heaviest的杀掉吧,否则按proc_adj_lru来选择。默认heaviest task是关闭的,到底哪一种效果好?靠你了:]

heaviest是什么意思?看样子就是占用大内存的,看code:

static struct proc *proc_get_heaviest(int oomadj) {
    struct adjslot_list *head = &procadjslot_list[ADJTOSLOT(oomadj)];
    struct adjslot_list *curr = head->next;
    struct proc *maxprocp = NULL;
    int maxsize = 0;
    while (curr != head) {
        int pid = ((struct proc *)curr)->pid;
        int tasksize = proc_get_size(pid);
        if (tasksize <= 0) {
            struct adjslot_list *next = curr->next;
            pid_remove(pid);
            curr = next;
        } else {
            if (tasksize > maxsize) {
                maxsize = tasksize;
                maxprocp = (struct proc *)curr;
            }
            curr = curr->next;
        }
    }
    return maxprocp;
}

在这个adj的list里选个size(rss)最大的,size从/proc/pid/statm里取,也就是adj越大&&占用内存越大越容易被杀,和原来内核的策略一样一样的。

proc_adj_lru就是在这个adj的list里选了个tail(最不活跃?)的杀掉。

static struct proc *proc_adj_lru(int oomadj) {
    return (struct proc *)adjslot_tail(&procadjslot_list[ADJTOSLOT(oomadj)]);
}

继续看:

            killed_size = kill_one_process(procp, min_score_adj, level);
            if (killed_size >= 0) {
#ifdef LMKD_LOG_STATS
                if (enable_stats_log && !lmk_state_change_start) {
                    lmk_state_change_start = true;
                    stats_write_lmk_state_changed(log_ctx, LMK_STATE_CHANGED,
                                                  LMK_STATE_CHANGE_START);
                }
#endif

                pages_freed += killed_size;
                if (pages_freed >= pages_to_free) {

#ifdef LMKD_LOG_STATS
                    if (enable_stats_log && lmk_state_change_start) {
                        stats_write_lmk_state_changed(log_ctx, LMK_STATE_CHANGED,
                                LMK_STATE_CHANGE_STOP);
                    }
#endif
                    return pages_freed;
                }
            }

kill_one_process杀完后会check pages_freed是不是达标到pages_to_free,如果是就ok了,否则继续。

ok, 那什么时候去杀?如何探测内存压力? 具体就是lmkd接收到memory pressure事件后在这个event的handler (mp_event_common)里去处理。

when to kill

mp_event_common 9.0最大的变化相比8.0就是:

  • 增加minfree algorithem
  • 区分low ram device and high performance device
  • 高通加入adaptive逻辑(参考msm kernel adaptive lmk),一个加强小特性

一上来会根据meminfo和zoneinfo记录相关内存信息。

    if (meminfo_parse(&mi) < 0 || zoneinfo_parse(&zi) < 0) {
        ALOGE("Failed to get free memory!");
        return;
    }

如果开启了use_minfree_levels(默认是关闭的):

    use_minfree_levels =
        property_get_bool("ro.lmk.use_minfree_levels", false);
    if (use_minfree_levels) {
        int i;

        other_free = mi.field.nr_free_pages - zi.field.totalreserve_pages;
        if (mi.field.nr_file_pages > (mi.field.shmem + mi.field.unevictable + mi.field.swap_cached)) {
            other_file = (mi.field.nr_file_pages - mi.field.shmem -
                          mi.field.unevictable - mi.field.swap_cached);
        } else {
            other_file = 0;
        }

        min_score_adj = OOM_SCORE_ADJ_MAX + 1;
        for (i = 0; i < lowmem_targets_size; i++) {
            minfree = lowmem_minfree[i];
            if (other_free < minfree && other_file < minfree) {
                min_score_adj = lowmem_adj[i];
                // Adaptive LMK
                if (enable_adaptive_lmk && level == VMPRESS_LEVEL_CRITICAL &&
                        i > lowmem_targets_size-4) {
                    min_score_adj = lowmem_adj[i-1];
                }
                break;
            }
        }

        if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
            if (debug_process_killing) {
                ALOGI("Ignore %s memory pressure event "
                      "(free memory=%ldkB, cache=%ldkB, limit=%ldkB)",
                      level_name[level], other_free * page_k, other_file * page_k,
                      (long)lowmem_minfree[lowmem_targets_size - 1] * page_k);
            }
            return;
        }

        if (enhance_batch_kill) {
            // Kill one process at a time.
            pages_to_free = 0;
        } else {
            /* Original minfree logic */
            /* Free up enough pages to push over the highest minfree level */
            pages_to_free = lowmem_minfree[lowmem_targets_size - 1] -
                ((other_free < other_file) ? other_free : other_file);
        }
        goto do_kill;
    }

和内核逻辑一样,查看当前剩余内存是否在minfree分段范围内,如果在那么就把这个minfree对应的adj给到min_score_adj,那要释放多少内存了?

高通默认关闭了多个任务的逻辑,只允许杀一个进程:

       if (enhance_batch_kill) {
            // Kill one process at a time.
            pages_to_free = 0;
        } else {
            /* Original minfree logic */
            /* Free up enough pages to push over the highest minfree level */
            pages_to_free = lowmem_minfree[lowmem_targets_size - 1] -
                ((other_free < other_file) ? other_free : other_file);
        }

如果要杀多个进程,就是把那个距离minfree的差值要释放出来。

另外,高通加的这段:

                // Adaptive LMK
                if (enable_adaptive_lmk && level == VMPRESS_LEVEL_CRITICAL &&
                        i > lowmem_targets_size-4) {
                    min_score_adj = lowmem_adj[i-1];
                }

就是在critical内存特紧张时,可以杀更低一级的adj的task。

ok,下来看下计算swapping是否频繁:

    if ((mem_usage = get_memory_usage(&mem_usage_file_data)) < 0) {
        goto do_kill;
    }
    if ((memsw_usage = get_memory_usage(&memsw_usage_file_data)) < 0) {
        goto do_kill;
    }

    // Calculate percent for swappinness.
    mem_pressure = (mem_usage * 100) / memsw_usage;

没内存的直接去do_kill了,swapping从memcg处获得。

下来根据swapping情况判断是否要upgrade or downgrade level,默认关闭了, go上使能。

    if (enable_pressure_upgrade && level != VMPRESS_LEVEL_CRITICAL) {
        // We are swapping too much.
        if (mem_pressure < upgrade_pressure) {
            level = upgrade_level(level);
            if (debug_process_killing) {
                ALOGI("Event upgraded to %s", level_name[level]);
            }
        }
    }

    // If the pressure is larger than downgrade_pressure lmk will not
    // kill any process, since enough memory is available.
    if (mem_pressure > downgrade_pressure) {
        if (debug_process_killing) {
            ALOGI("Ignore %s memory pressure", level_name[level]);
        }
        return;
    } else if (level == VMPRESS_LEVEL_CRITICAL &&
               mem_pressure > upgrade_pressure) {
        if (debug_process_killing) {
            ALOGI("Downgrade critical memory pressure");
        }
        // Downgrade event, since enough memory available.
        level = downgrade_level(level);
    }

以前的文章已经分析过。

下来看do_kill:

do_kill:
    if (low_ram_device) {
        /* For Go devices kill only one task */
        if (find_and_kill_processes(level, level_oomadj[level], 0) == 0) {
            if (debug_process_killing) {
                ALOGI("Nothing to kill");
            }
        }
    } else {

可见,对low ram设备,直接找个杀掉完了。对high ram,如果用了minfree直接杀,没用minfree来看下:

        if (!use_minfree_levels) {
            /* If pressure level is less than critical and enough free swap then ignore */
            if (level < VMPRESS_LEVEL_CRITICAL &&
                mi.field.free_swap > low_pressure_mem.max_nr_free_pages) {
                if (debug_process_killing) {
                    ALOGI("Ignoring pressure since %" PRId64
                          " swap pages are available ",
                          mi.field.free_swap);
                }
                return;
            }
            /* Free up enough memory to downgrate the memory pressure to low level */
            if (mi.field.nr_free_pages < low_pressure_mem.max_nr_free_pages) {
                pages_to_free = low_pressure_mem.max_nr_free_pages -
                    mi.field.nr_free_pages;
            } else {
                if (debug_process_killing) {
                    ALOGI("Ignoring pressure since more memory is "
                        "available (%" PRId64 ") than watermark (%" PRId64 ")",
                        mi.field.nr_free_pages, low_pressure_mem.max_nr_free_pages);
                }
                return;
            }
            min_score_adj = level_oomadj[level];
        }

if event level is low or medium, 那么看下如果有足够的free swap,就不杀了,如果没有就要释放enough memory到low pressure memory的free水平。rt?

关于procadjslot_list的insert是在data_sock的handle里处理,如下:

/* max supported number of data connections */
#define MAX_DATA_CONN 2

/* socket event handler data */
static struct sock_event_handler_info ctrl_sock;
static struct sock_event_handler_info data_sock[MAX_DATA_CONN];

/* 3 memory pressure levels, 1 ctrl listen socket, 2 ctrl data socket */
#define MAX_EPOLL_EVENTS (1 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT)
static int epollfd;
static int maxevents;

说的就是socket event handler,这两个连接一个是原来的for ActivityManager,一个是for lmk test process, mark to check later。