Android P lmkd新增了许多机制和属性,包括引入原来内核的minfree算法等,看样子是越来越复杂了,下面来看看,内核版本4.9,高通平台。

how to kill

先看下应该杀掉哪个process or 哪些processes(yes, 已经支持)?

主要改动是:

  • support kill heaviest process
  • support kill multiple processes
/*
* Find processes to kill to free required number of pages.
* If pages_to_free is set to 0 only one process will be killed.
* Returns the size of the killed processes.
*/
static int find_and_kill_processes(enum vmpressure_level level,
int min_score_adj, int pages_to_free) {
int i;
int killed_size;
int pages_freed = 0;

#ifdef LMKD_LOG_STATS
bool lmk_state_change_start = false;
#endif

for (i = OOM_SCORE_ADJ_MAX; i >= min_score_adj; i--) {
struct proc *procp;

while (true) {
procp = kill_heaviest_task ?
proc_get_heaviest(i) : proc_adj_lru(i);

if (!procp)
break;
kill_heaviest_task =
property_get_bool("ro.lmk.kill_heaviest_task", false);

可见还是从大adj开始,如果我们要kill heaviest task, 那就选个heaviest的杀掉吧,否则按proc_adj_lru来选择。默认heaviest task是关闭的,到底哪一种效果好?靠你了:]

heaviest是什么意思?看样子就是占用大内存的,看code:

static struct proc *proc_get_heaviest(int oomadj) {
struct adjslot_list *head = &procadjslot_list[ADJTOSLOT(oomadj)];
struct adjslot_list *curr = head->next;
struct proc *maxprocp = NULL;
int maxsize = 0;
while (curr != head) {
int pid = ((struct proc *)curr)->pid;
int tasksize = proc_get_size(pid);
if (tasksize <= 0) {
struct adjslot_list *next = curr->next;
pid_remove(pid);
curr = next;
} else {
if (tasksize > maxsize) {
maxsize = tasksize;
maxprocp = (struct proc *)curr;
}
curr = curr->next;
}
}
return maxprocp;
}

在这个adj的list里选个size(rss)最大的,size从/proc/pid/statm里取,也就是adj越大&&占用内存越大越容易被杀,和原来内核的策略一样一样的。

proc_adj_lru就是在这个adj的list里选了个tail(最不活跃?)的杀掉。

static struct proc *proc_adj_lru(int oomadj) {
return (struct proc *)adjslot_tail(&procadjslot_list[ADJTOSLOT(oomadj)]);
}

继续看:

            killed_size = kill_one_process(procp, min_score_adj, level);
if (killed_size >= 0) {
#ifdef LMKD_LOG_STATS
if (enable_stats_log && !lmk_state_change_start) {
lmk_state_change_start = true;
stats_write_lmk_state_changed(log_ctx, LMK_STATE_CHANGED,
LMK_STATE_CHANGE_START);
}
#endif

pages_freed += killed_size;
if (pages_freed >= pages_to_free) {

#ifdef LMKD_LOG_STATS
if (enable_stats_log && lmk_state_change_start) {
stats_write_lmk_state_changed(log_ctx, LMK_STATE_CHANGED,
LMK_STATE_CHANGE_STOP);
}
#endif
return pages_freed;
}
}

kill_one_process杀完后会check pages_freed是不是达标到pages_to_free,如果是就ok了,否则继续。

ok, 那什么时候去杀?如何探测内存压力? 具体就是lmkd接收到memory pressure事件后在这个event的handler (mp_event_common)里去处理。

when to kill

mp_event_common 9.0最大的变化相比8.0就是:

  • 增加minfree algorithem
  • 区分low ram device and high performance device
  • 高通加入adaptive逻辑(参考msm kernel adaptive lmk),一个加强小特性

一上来会根据meminfo和zoneinfo记录相关内存信息。

if (meminfo_parse(&mi) < 0 || zoneinfo_parse(&zi) < 0) {
ALOGE("Failed to get free memory!");
return;
}

如果开启了use_minfree_levels(默认是关闭的):

use_minfree_levels =
property_get_bool("ro.lmk.use_minfree_levels", false);
if (use_minfree_levels) {
int i;

other_free = mi.field.nr_free_pages - zi.field.totalreserve_pages;
if (mi.field.nr_file_pages > (mi.field.shmem + mi.field.unevictable + mi.field.swap_cached)) {
other_file = (mi.field.nr_file_pages - mi.field.shmem -
mi.field.unevictable - mi.field.swap_cached);
} else {
other_file = 0;
}

min_score_adj = OOM_SCORE_ADJ_MAX + 1;
for (i = 0; i < lowmem_targets_size; i++) {
minfree = lowmem_minfree[i];
if (other_free < minfree && other_file < minfree) {
min_score_adj = lowmem_adj[i];
// Adaptive LMK
if (enable_adaptive_lmk && level == VMPRESS_LEVEL_CRITICAL &&
i > lowmem_targets_size-4) {
min_score_adj = lowmem_adj[i-1];
}
break;
}
}

if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
if (debug_process_killing) {
ALOGI("Ignore %s memory pressure event "
"(free memory=%ldkB, cache=%ldkB, limit=%ldkB)",
level_name[level], other_free * page_k, other_file * page_k,
(long)lowmem_minfree[lowmem_targets_size - 1] * page_k);
}
return;
}

if (enhance_batch_kill) {
// Kill one process at a time.
pages_to_free = 0;
} else {
/* Original minfree logic */
/* Free up enough pages to push over the highest minfree level */
pages_to_free = lowmem_minfree[lowmem_targets_size - 1] -
((other_free < other_file) ? other_free : other_file);
}
goto do_kill;
}

和内核逻辑一样,查看当前剩余内存是否在minfree分段范围内,如果在那么就把这个minfree对应的adj给到min_score_adj,那要释放多少内存了?

高通默认关闭了多个任务的逻辑,只允许杀一个进程:

if (enhance_batch_kill) {
// Kill one process at a time.
pages_to_free = 0;
} else {
/* Original minfree logic */
/* Free up enough pages to push over the highest minfree level */
pages_to_free = lowmem_minfree[lowmem_targets_size - 1] -
((other_free < other_file) ? other_free : other_file);
}

如果要杀多个进程,就是把那个距离minfree的差值要释放出来。

另外,高通加的这段:

// Adaptive LMK
if (enable_adaptive_lmk && level == VMPRESS_LEVEL_CRITICAL &&
i > lowmem_targets_size-4) {
min_score_adj = lowmem_adj[i-1];
}

就是在critical内存特紧张时,可以杀更低一级的adj的task。

ok,下来看下计算swapping是否频繁:

if ((mem_usage = get_memory_usage(&mem_usage_file_data)) < 0) {
goto do_kill;
}
if ((memsw_usage = get_memory_usage(&memsw_usage_file_data)) < 0) {
goto do_kill;
}

// Calculate percent for swappinness.
mem_pressure = (mem_usage * 100) / memsw_usage;

没内存的直接去do_kill了,swapping从memcg处获得。

下来根据swapping情况判断是否要upgrade or downgrade level,默认关闭了, go上使能。

if (enable_pressure_upgrade && level != VMPRESS_LEVEL_CRITICAL) {
// We are swapping too much.
if (mem_pressure < upgrade_pressure) {
level = upgrade_level(level);
if (debug_process_killing) {
ALOGI("Event upgraded to %s", level_name[level]);
}
}
}

// If the pressure is larger than downgrade_pressure lmk will not
// kill any process, since enough memory is available.
if (mem_pressure > downgrade_pressure) {
if (debug_process_killing) {
ALOGI("Ignore %s memory pressure", level_name[level]);
}
return;
} else if (level == VMPRESS_LEVEL_CRITICAL &&
mem_pressure > upgrade_pressure) {
if (debug_process_killing) {
ALOGI("Downgrade critical memory pressure");
}
// Downgrade event, since enough memory available.
level = downgrade_level(level);
}

以前的文章已经分析过。

下来看do_kill:

do_kill:
if (low_ram_device) {
/* For Go devices kill only one task */
if (find_and_kill_processes(level, level_oomadj[level], 0) == 0) {
if (debug_process_killing) {
ALOGI("Nothing to kill");
}
}
} else {

可见,对low ram设备,直接找个杀掉完了。对high ram,如果用了minfree直接杀,没用minfree来看下:

if (!use_minfree_levels) {
/* If pressure level is less than critical and enough free swap then ignore */
if (level < VMPRESS_LEVEL_CRITICAL &&
mi.field.free_swap > low_pressure_mem.max_nr_free_pages) {
if (debug_process_killing) {
ALOGI("Ignoring pressure since %" PRId64
" swap pages are available ",
mi.field.free_swap);
}
return;
}
/* Free up enough memory to downgrate the memory pressure to low level */
if (mi.field.nr_free_pages < low_pressure_mem.max_nr_free_pages) {
pages_to_free = low_pressure_mem.max_nr_free_pages -
mi.field.nr_free_pages;
} else {
if (debug_process_killing) {
ALOGI("Ignoring pressure since more memory is "
"available (%" PRId64 ") than watermark (%" PRId64 ")",
mi.field.nr_free_pages, low_pressure_mem.max_nr_free_pages);
}
return;
}
min_score_adj = level_oomadj[level];
}

if event level is low or medium, 那么看下如果有足够的free swap,就不杀了,如果没有就要释放enough memory到low pressure memory的free水平。rt?

关于procadjslot_list的insert是在data_sock的handle里处理,如下:

/* max supported number of data connections */
#define MAX_DATA_CONN 2

/* socket event handler data */
static struct sock_event_handler_info ctrl_sock;
static struct sock_event_handler_info data_sock[MAX_DATA_CONN];

/* 3 memory pressure levels, 1 ctrl listen socket, 2 ctrl data socket */
#define MAX_EPOLL_EVENTS (1 + MAX_DATA_CONN + VMPRESS_LEVEL_COUNT)
static int epollfd;
static int maxevents;

说的就是socket event handler,这两个连接一个是原来的for ActivityManager,一个是for lmk test process, mark to check later。