问题 workqueue name 就是 alloc_workqueue
的第一个参数:
前文已经说过如何使用 perf sched
来统计 task 调度延迟,但是如下图 workqueue name 被剪掉了:
----------------------------------------------------------------------------------------------------------------- Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at | ----------------------------------------------------------------------------------------------------------------- kworker/4:2-eve:404 | 88.386 ms | 207 | avg: 0.248 ms | max: 12.380 ms | max at: 271.002611 s kworker/u16:18-:1499 | 36.641 ms | 681 | avg: 0.081 ms | max: 4.086 ms | max at: 269.173878 s kworker/u16:17-:1498 | 21.918 ms | 511 | avg: 0.080 ms | max: 2.706 ms | max at: 272.811798 s kworker/u16:19-:1500 | 41.742 ms | 713 | avg: 0.077 ms | max: 4.098 ms | max at: 272.342235 s
那如何显示完整name了?我们直接看 perf 源码(kernel 4.19)。
perf 源码分析 打印入口:
static int perf_sched__lat (struct perf_sched *sched) { struct rb_node *next ; setup_pager(); if (perf_sched__read_events(sched)) return -1 ; perf_sched__merge_lat(sched); perf_sched__sort_lat(sched); printf ("\n -----------------------------------------------------------------------------------------------------------------\n" ); printf (" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n" ); printf (" -----------------------------------------------------------------------------------------------------------------\n" ); next = rb_first(&sched->sorted_atom_root); while (next) { struct work_atoms *work_list ; work_list = rb_entry(next, struct work_atoms, node); output_lat_thread(sched, work_list); next = rb_next(next); thread__zput(work_list->thread); }
应该是 output_lat_thread()
:
static void output_lat_thread (struct perf_sched *sched, struct work_atoms *work_list) { int i; int ret; u64 avg; char max_lat_at[32 ]; if (!work_list->nb_atoms) return ; if (!strcmp (thread__comm_str(work_list->thread), "swapper" )) return ; sched->all_runtime += work_list->total_runtime; sched->all_count += work_list->nb_atoms; if (work_list->num_merged > 1 ) ret = printf (" %s:(%d) " , thread__comm_str(work_list->thread), work_list->num_merged); else ret = printf (" %s:%d " , thread__comm_str(work_list->thread), work_list->thread->tid); for (i = 0 ; i < 24 - ret; i++)
所以,那个 workqueue name 应该是来自 thread__comm_str(work_list->thread)
。
const char *thread__comm_str (const struct thread *thread) { const char *str; down_read((struct rw_semaphore *)&thread->comm_lock); str = __thread__comm_str(thread); up_read((struct rw_semaphore *)&thread->comm_lock); return str; }
static const char *__thread__comm_str(const struct thread *thread){ const struct comm *comm = thread__comm(thread); if (!comm) return NULL ; return comm__str(comm); }
struct comm *thread__comm (const struct thread *thread) { if (list_empty(&thread->comm_list)) return NULL ; return list_first_entry(&thread->comm_list, struct comm, list ); }
啊,原来是thread
里有个链表 comm_list
,从这个表里取出comm
-> comm__str(comm)
:
const char *comm__str (const struct comm *comm) { return comm->comm_str->str; }
大概看下 struct comm
:
struct comm_str ;struct comm { struct comm_str *comm_str ; u64 start; struct list_head list ; bool exec; union { void *priv; u64 db_id; }; };
util/comm.c里:
struct comm_str { char *str; struct rb_node rb_node ; refcount_t refcnt; };
能看到 str
是一个 char * 的指针,那啥时候分配的?分配里不是有大小吗?这个comm.c文件里就有分配实现:
static struct comm_str *comm_str__alloc (const char *str) { struct comm_str *cs ; cs = zalloc(sizeof (*cs)); if (!cs) return NULL ; cs->str = strdup(str); if (!cs->str) { free (cs); return NULL ; } refcount_set(&cs->refcnt, 1 ); return cs; }
是按照入参str
的长度来分配的,继续跟。
有两个caller: comm__new
和 comm__override
,看下 comm__new
:
struct thread *thread__new (pid_t pid, pid_t tid) { char *comm_str; struct comm *comm ; struct thread *thread = zalloc(sizeof (*thread)); if (thread != NULL ) { thread->pid_ = pid; thread->tid = tid; thread->ppid = -1 ; thread->cpu = -1 ; INIT_LIST_HEAD(&thread->namespaces_list); INIT_LIST_HEAD(&thread->comm_list); init_rwsem(&thread->namespaces_lock); init_rwsem(&thread->comm_lock); comm_str = malloc (32 ); if (!comm_str) goto err_thread; snprintf (comm_str, 32 , ":%d" , tid); comm = comm__new(comm_str, 0 , false ); free (comm_str); if (!comm) goto err_thread; list_add(&comm->list , &thread->comm_list);
分配了32个字节,应该够了,那实际用了多少?也就是这个 str
是如何被填入的,那就是 sched record
喽。回头再看下相关code:
struct work_atom { struct list_head list ; enum thread_state state ; u64 sched_out_time; u64 wake_up_time; u64 sched_in_time; u64 runtime; }; struct work_atoms { struct list_head work_list ; struct thread *thread ; struct rb_node node ; u64 max_lat; u64 max_lat_at; u64 total_lat; u64 nb_atoms; u64 total_runtime; int num_merged; };
每一个 struct work_atom
定义的work都有相应的行为成员,work_atom
的list
用来关联到work_atoms
。
list_add_tail(&atom->list , &atoms->work_list);
看下work_atoms
里的thread
,这个 thread
里有个 comm_list
,除了上文提到的thread_new()
会add 一个comm到这个list以外,还有:
static int ____thread__set_comm(struct thread *thread, const char *str, u64 timestamp, bool exec) { struct comm *new , *curr = thread__comm(thread); if (!thread->comm_set) { int err = comm__override(curr, str, timestamp, exec); if (err) return err; } else { new = comm__new(str, timestamp, exec); if (!new) return -ENOMEM; list_add(&new->list , &thread->comm_list); if (exec) unwind__flush_access(thread); } thread->comm_set = true ; return 0 ; }
if comm_set
过了,就new一个comm。read这个set code的分支有点多,我们借用dump_stack()
看下运行时:
+++ b/tools/perf/util/thread.c @@ -218 ,6 +218 ,8 @@ static int ____thread__set_comm(struct thread *thread, const char *str, { struct comm *new, *curr = thread__comm(thread); + dump_stack();
能追到:
cmd_record > perf_session__process_events -> perf_session__deliver_event ->machines__deliver_event-> machine__process_comm_event
前面几个都ok,中间有个machines__deliver_event是怎么调到machine__process_comm_event的呢?来看下。
static int machines__deliver_event (struct machines *machines, struct perf_evlist *evlist, union perf_event *event, struct perf_sample *sample, struct perf_tool *tool, u64 file_offset) { [...] switch (event->header.type) { [...] case PERF_RECORD_COMM: return tool->comm(tool, event, sample, machine);
应该是这里,用了一个callback,看下builtin-sched.c/cmd sched:
int cmd_sched (int argc, const char **argv) { const char default_sort_order[] = "avg, max, switch, runtime" ; struct perf_sched sched = { .tool = { .sample = perf_sched__process_tracepoint_sample, .comm = perf_sched__process_comm,
see? 这里有个tool的comm (perf_sched__process_comm
),直觉告诉我就是它:)
static int perf_sched__process_comm (struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) { struct thread *thread ; struct thread_runtime *tr ; int err; err = perf_event__process_comm(tool, event, sample, machine); if (err)
int perf_event__process_comm (struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, struct machine *machine) { return machine__process_comm_event(machine, event, sample); }
rt? 到这里整个call stack就清楚了。现在应该知道怎么改了,如果还不知道,check below:
版权声明: 本站所有文章均采用 CC BY-NC-SA 4.0 CN 许可协议。转载请注明原文链接!