QCOM安卓armv7 smp平台, kernel 4.x。死机时dmesg:

[    4.790798] Unable to handle kernel NULL pointer dereference at virtual address 000001e0
[ 4.790803] pgd = edfd65c2
[ 4.790807] [000001e0] *pgd=00000000
[ 4.790813] Internal error: Oops: 5 [#1] PREEMPT SMP ARM
[ 4.790817] Modules linked in:
[ 4.790822] CPU: 2 PID: 300 Comm: kworker/2:2 Not tainted 4.x
[ 4.790838] Workqueue: events monitor_soc_work
[ 4.790841] task: 82e93681 task.stack: be01efa9
[ 4.790848] PC is at _raw_spin_lock_irqsave+0x30/0x70
[ 4.790854] LR is at preempt_count_add+0xa4/0xb8
[ 4.790859] pc : [<c10638c8>] lr : [<c0168194>] psr: 40000193
[ 4.790859] sp : e9707d80 ip : e9707d60 fp : e9707d94
[ 4.790861] r10: c1909248 r9 : e9707e48 r8 : 00000000
[ 4.790864] r7 : e972d9e4 r6 : 00000020 r5 : 60000113 r4 : 000001e0
[ 4.790866] r3 : c1909a68 r2 : 00000001 r1 : 00000000 r0 : 00000000
[ 4.790870] Flags: nZcv IRQs off FIQs on Mode SVC_32 ISA ARM Segment none
[ 4.790873] Control: 10c0383d Table: 8000406a DAC: 00000051

QCOM私有的那个分析工具的Call Stacks就看不出来,Red Hat的crash的bt提示不能work:

crash.arm> bt
PID: 300 TASK: e9698e80 CPU: 2 COMMAND: "kworker/2:2"
bt: WARNING: cannot determine starting stack frame for task e9698e80

Why? mark to check later.

用QCOM的ramparser继续check:

Parsing CPU2 context start aab41000 end aab41800
Core 2 PC: _raw_spin_lock+4c <c106350c>
Core 2 LR: _raw_spin_lock+20 <c10634e0>

[<c106350c>] _raw_spin_lock+0x4c
[<c105bfa0>] __schedule+0xc0
[<c105cac0>] schedule+0x58
[<c013794c>] do_exit+0x95c
[<c010ea9c>] die+0x248
[<c0121c90>] __do_kernel_fault.part.0+0x74
[<c1064e6c>] do_page_fault+0x44c
[<c010130c>] do_DataAbort+0x58
[<c10640bc>] __dabt_svc+0x5c
[<c01584dc>] kthread_data+0x18
[<c0151cec>] wq_worker_sleeping+0x18
[<c105c2d4>] __schedule+0x3f4
[<c016fa08>] do_task_dead+0xa4
[<c0137808>] do_exit+0x818
[<c010ea9c>] die+0x248
[<c0121c90>] __do_kernel_fault.part.0+0x74
[<c1064e6c>] do_page_fault+0x44c
[<c1064f88>] do_translation_fault+0x110
[<c010130c>] do_DataAbort+0x58
[<c10640bc>] __dabt_svc+0x5c
[<c10638c8>] _raw_spin_lock_irqsave+0x30
[<c0aee46c>] power_supply_changed+0x3c
[<c0b66510>] qpnp_batt_power_set_property+0xf0
[<c0aee330>] power_supply_set_property+0x34
[<c0b55760>] report_eoc+0x90
[<c0b579d4>] report_vm_bms_soc+0x7c8
[<c0b5a398>] monitor_soc_work+0x708
[<c014f814>] process_one_work+0x184
[<c014fc20>] worker_thread+0x54
[<c0157720>] kthread+0x128
[<c0109774>] ret_from_fork+0x14

ok, 以上就是panic时call stack。先来看c code:

113void power_supply_changed(struct power_supply *psy)
114{
115 unsigned long flags;
116
117 dev_dbg(&psy->dev, "%s\n", __func__);
118
119 spin_lock_irqsave(&psy->changed_lock, flags); //tj: here
120 psy->changed = true;
121 pm_stay_awake(&psy->dev);
122 spin_unlock_irqrestore(&psy->changed_lock, flags);
123 schedule_work(&psy->changed_work);
124}
125EXPORT_SYMBOL_GPL(power_supply_changed);

看name,好像是死在spin_lock_irqsave()里了。来看看是不是:

spin_lock_irqsave():

335#define spin_lock_irqsave(lock, flags)				\
336do { \
337 raw_spin_lock_irqsave(spinlock_check(lock), flags); \
338} while (0)

spinlock_check():

289static __always_inline raw_spinlock_t *spinlock_check(spinlock_t *lock)
290{
291 return &lock->rlock;
292}

raw_spin_lock_irqsave():

203#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
204
205#define raw_spin_lock_irqsave(lock, flags) \
206 do { \
207 typecheck(unsigned long, flags); \
208 flags = _raw_spin_lock_irqsave(lock); \
209 } while (0)

_raw_spin_lock_irqsave():

156#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE
157unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
158{
159 return __raw_spin_lock_irqsave(lock);
160}
161EXPORT_SYMBOL(_raw_spin_lock_irqsave);
162#endif

ps: CONFIG_INLINE_SPIN_LOCK_IRQSAVE未配置。

ok,_raw_spin_lock_irqsave出现了,正如我们预料的。继续看:

__raw_spin_lock_irqsave():

/*
* If lockdep is enabled then we use the non-preemption spin-ops
* even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
* not re-enabled during lock-acquire (which the preempt-spin-ops do):
*/
#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)

static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock)
{
unsigned long flags;

local_irq_save(flags);
preempt_disable();
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
/*
* On lockdep we dont want the hand-coded irq-enable of
* do_raw_spin_lock_flags() code, because lockdep assumes
* that interrupts are not re-enabled during lock-acquire:
*/
#ifdef CONFIG_LOCKDEP
LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
#else
do_raw_spin_lock_flags(lock, &flags);
#endif
return flags;
}

ok。我们来看死机现场asm code:

[    4.790848] PC is at _raw_spin_lock_irqsave+0x30/0x70
...
[ 4.790859] pc : [<c10638c8>]
[<c10638c8>] _raw_spin_lock_irqsave+0x30

不用objdump整个vmlinux,这时crash就派上用场了,直接dis就可以:

crash.arm> dis _raw_spin_lock_irqsave
0xc1063898 <_raw_spin_lock_irqsave>: mov r12, sp
0xc106389c <_raw_spin_lock_irqsave+4>: push {r4, r5, r11, r12, lr, pc}
0xc10638a0 <_raw_spin_lock_irqsave+8>: sub r11, r12, #4
0xc10638a4 <_raw_spin_lock_irqsave+12>: push {lr} ; (str lr, [sp, #-4]!)
0xc10638a8 <_raw_spin_lock_irqsave+16>: ldmfd sp!, {lr}
0xc10638ac <_raw_spin_lock_irqsave+20>: mov r4, r0
0xc10638b0 <_raw_spin_lock_irqsave+24>: mrs r5, CPSR
0xc10638b4 <_raw_spin_lock_irqsave+28>: cpsid i
0xc10638b8 <_raw_spin_lock_irqsave+32>: bl 0xc0239fe0 <trace_hardirqs_off>
0xc10638bc <_raw_spin_lock_irqsave+36>: mov r0, #1
0xc10638c0 <_raw_spin_lock_irqsave+40>: bl 0xc01680f0 <preempt_count_add>
0xc10638c4 <_raw_spin_lock_irqsave+44>: pldw [r4]
0xc10638c8 <_raw_spin_lock_irqsave+48>: ldrex r3, [r4] //tj: pc
0xc10638cc <_raw_spin_lock_irqsave+52>: add r2, r3, #65536 ; 0x10000
0xc10638d0 <_raw_spin_lock_irqsave+56>: strex r1, r2, [r4]
0xc10638d4 <_raw_spin_lock_irqsave+60>: teq r1, #0
0xc10638d8 <_raw_spin_lock_irqsave+64>: bne 0xc10638c8 <_raw_spin_lock_irqsave+48>
0xc10638dc <_raw_spin_lock_irqsave+68>: lsr r2, r3, #16
0xc10638e0 <_raw_spin_lock_irqsave+72>: uxth r3, r3
0xc10638e4 <_raw_spin_lock_irqsave+76>: cmp r2, r3
0xc10638e8 <_raw_spin_lock_irqsave+80>: beq 0xc10638fc <_raw_spin_lock_irqsave+100>
0xc10638ec <_raw_spin_lock_irqsave+84>: wfe
0xc10638f0 <_raw_spin_lock_irqsave+88>: ldrh r3, [r4]
0xc10638f4 <_raw_spin_lock_irqsave+92>: cmp r2, r3
0xc10638f8 <_raw_spin_lock_irqsave+96>: bne 0xc10638ec <_raw_spin_lock_irqsave+84>
0xc10638fc <_raw_spin_lock_irqsave+100>: dmb ish
0xc1063900 <_raw_spin_lock_irqsave+104>: mov r0, r5
0xc1063904 <_raw_spin_lock_irqsave+108>: ldm sp, {r4, r5, r11, sp, pc}
crash.arm>

pc:

0xc10638c8 <_raw_spin_lock_irqsave+48>: ldrex   r3, [r4] //tj: pc

现场r4是:

r4 : 000001e0

r4就来源r0:

0xc10638ac <_raw_spin_lock_irqsave+20>: mov     r4, r0

r0就是_raw_spin_lock_irqsave()的入参喽。前面看过c code了:

157unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)

这个lock是000001e0? 不是[ 4.790798] Unable to handle kernel NULL pointer dereference at virtual address 000001e0吗?哪里有0?

看下NULL pointer由来:

131static void
132__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
133 struct pt_regs *regs)
134{
135 /*
136 * Are we prepared to handle this kernel fault?
137 */
138 if (fixup_exception(regs))
139 return;
140
141 /*
142 * No handler, we'll have to terminate things with extreme prejudice.
143 */
144 bust_spinlocks(1);
145 pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
146 (addr < PAGE_SIZE) ? "NULL pointer dereference" :
147 "paging request", addr);

好嘛,只要addr < PAGE_SIZE就是NULL pointer了。那_raw_spin_lock_irqsave(raw_spinlock_t *lock)入参lock是不是000001e0?继续分析:

这个lock来源:

335#define spin_lock_irqsave(lock, flags)				\
336do { \
337 raw_spin_lock_irqsave(spinlock_check(lock), flags); \
338} while (0)

spinlock_check的反汇编check下:

crash.arm> dis spinlock_check
symbol not found: spinlock_check
possible alternatives:
(none found)

应该是inline的原因:

289static __always_inline raw_spinlock_t *spinlock_check(spinlock_t *lock)
290{
291 return &lock->rlock;
292}

spinlock_t结构:

typedef struct spinlock {
union {
struct raw_spinlock rlock;
};
} spinlock_t;

继续看caller:

113void power_supply_changed(struct power_supply *psy)
114{
115 unsigned long flags;
116
117 dev_dbg(&psy->dev, "%s\n", __func__);
118
119 spin_lock_irqsave(&psy->changed_lock, flags); //tj: here

so,这里的&psy->changed_lock是000001e0吗? 继续反汇编分析: