QCOM安卓armv7 smp平台, kernel 4.x。死机时dmesg:
[ 4.790798] Unable to handle kernel NULL pointer dereference at virtual address 000001e0 [ 4.790803] pgd = edfd65c2 [ 4.790807] [000001e0] *pgd=00000000 [ 4.790813] Internal error: Oops: 5 [#1] PREEMPT SMP ARM [ 4.790817] Modules linked in: [ 4.790822] CPU: 2 PID: 300 Comm: kworker/2:2 Not tainted 4.x [ 4.790838] Workqueue: events monitor_soc_work [ 4.790841] task: 82e93681 task.stack: be01efa9 [ 4.790848] PC is at _raw_spin_lock_irqsave+0x30/0x70 [ 4.790854] LR is at preempt_count_add+0xa4/0xb8 [ 4.790859] pc : [<c10638c8>] lr : [<c0168194>] psr: 40000193 [ 4.790859] sp : e9707d80 ip : e9707d60 fp : e9707d94 [ 4.790861] r10: c1909248 r9 : e9707e48 r8 : 00000000 [ 4.790864] r7 : e972d9e4 r6 : 00000020 r5 : 60000113 r4 : 000001e0 [ 4.790866] r3 : c1909a68 r2 : 00000001 r1 : 00000000 r0 : 00000000 [ 4.790870] Flags: nZcv IRQs off FIQs on Mode SVC_32 ISA ARM Segment none [ 4.790873] Control: 10c0383d Table: 8000406a DAC: 00000051
QCOM私有的那个分析工具的Call Stacks就看不出来,Red Hat的crash的bt提示不能work:
crash.arm> bt PID: 300 TASK: e9698e80 CPU: 2 COMMAND: "kworker/2:2" bt: WARNING: cannot determine starting stack frame for task e9698e80
Why? mark to check later.
用QCOM的ramparser继续check:
Parsing CPU2 context start aab41000 end aab41800 Core 2 PC: _raw_spin_lock+4c <c106350c> Core 2 LR: _raw_spin_lock+20 <c10634e0> [<c106350c>] _raw_spin_lock+0x4c [<c105bfa0>] __schedule+0xc0 [<c105cac0>] schedule+0x58 [<c013794c>] do_exit+0x95c [<c010ea9c>] die+0x248 [<c0121c90>] __do_kernel_fault.part.0+0x74 [<c1064e6c>] do_page_fault+0x44c [<c010130c>] do_DataAbort+0x58 [<c10640bc>] __dabt_svc+0x5c [<c01584dc>] kthread_data+0x18 [<c0151cec>] wq_worker_sleeping+0x18 [<c105c2d4>] __schedule+0x3f4 [<c016fa08>] do_task_dead+0xa4 [<c0137808>] do_exit+0x818 [<c010ea9c>] die+0x248 [<c0121c90>] __do_kernel_fault.part.0+0x74 [<c1064e6c>] do_page_fault+0x44c [<c1064f88>] do_translation_fault+0x110 [<c010130c>] do_DataAbort+0x58 [<c10640bc>] __dabt_svc+0x5c [<c10638c8>] _raw_spin_lock_irqsave+0x30 [<c0aee46c>] power_supply_changed+0x3c [<c0b66510>] qpnp_batt_power_set_property+0xf0 [<c0aee330>] power_supply_set_property+0x34 [<c0b55760>] report_eoc+0x90 [<c0b579d4>] report_vm_bms_soc+0x7c8 [<c0b5a398>] monitor_soc_work+0x708 [<c014f814>] process_one_work+0x184 [<c014fc20>] worker_thread+0x54 [<c0157720>] kthread+0x128 [<c0109774>] ret_from_fork+0x14
ok, 以上就是panic时call stack。先来看c code:
113 void power_supply_changed (struct power_supply *psy) 114{ 115 unsigned long flags;116 117 dev_dbg(&psy->dev, "%s\n" , __func__);118 119 spin_lock_irqsave(&psy->changed_lock, flags); 120 psy->changed = true ;121 pm_stay_awake(&psy->dev);122 spin_unlock_irqrestore(&psy->changed_lock, flags);123 schedule_work(&psy->changed_work);124 }125 EXPORT_SYMBOL_GPL(power_supply_changed);
看name,好像是死在spin_lock_irqsave()
里了。来看看是不是:
spin_lock_irqsave()
:
335 #define spin_lock_irqsave(lock, flags) \ 336do { \ 337 raw_spin_lock_irqsave(spinlock_check(lock), flags); \ 338} while (0)
spinlock_check()
:
289 static __always_inline raw_spinlock_t *spinlock_check (spinlock_t *lock) 290{ 291 return &lock->rlock;292 }
raw_spin_lock_irqsave()
:
203 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) 204 205 #define raw_spin_lock_irqsave(lock, flags) \ 206 do { \ 207 typecheck(unsigned long, flags); \ 208 flags = _raw_spin_lock_irqsave(lock); \ 209 } while (0)
_raw_spin_lock_irqsave()
:
156 #ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE 157u nsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)158 {159 return __raw_spin_lock_irqsave(lock);160 }161 EXPORT_SYMBOL(_raw_spin_lock_irqsave);162 #endif
ps: CONFIG_INLINE_SPIN_LOCK_IRQSAVE
未配置。
ok,_raw_spin_lock_irqsave
出现了,正如我们预料的。继续看:
__raw_spin_lock_irqsave()
:
#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock){ unsigned long flags; local_irq_save(flags); preempt_disable(); spin_acquire(&lock->dep_map, 0 , 0 , _RET_IP_); #ifdef CONFIG_LOCKDEP LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock); #else do_raw_spin_lock_flags(lock, &flags); #endif return flags; }
ok。我们来看死机现场asm code:
[ 4.790848] PC is at _raw_spin_lock_irqsave+0x30/0x70 ... [ 4.790859] pc : [<c10638c8>]
[<c10638c8>] _raw_spin_lock_irqsave+0x30
不用objdump整个vmlinux,这时crash就派上用场了,直接dis就可以:
crash.arm> dis _raw_spin_lock_irqsave 0xc1063898 <_raw_spin_lock_irqsave>: mov r12, sp 0xc106389c <_raw_spin_lock_irqsave+4>: push {r4, r5, r11, r12, lr, pc} 0xc10638a0 <_raw_spin_lock_irqsave+8>: sub r11, r12, #4 0xc10638a4 <_raw_spin_lock_irqsave+12>: push {lr} ; (str lr, [sp, #-4]!) 0xc10638a8 <_raw_spin_lock_irqsave+16>: ldmfd sp!, {lr} 0xc10638ac <_raw_spin_lock_irqsave+20>: mov r4, r0 0xc10638b0 <_raw_spin_lock_irqsave+24>: mrs r5, CPSR 0xc10638b4 <_raw_spin_lock_irqsave+28>: cpsid i 0xc10638b8 <_raw_spin_lock_irqsave+32>: bl 0xc0239fe0 <trace_hardirqs_off> 0xc10638bc <_raw_spin_lock_irqsave+36>: mov r0, #1 0xc10638c0 <_raw_spin_lock_irqsave+40>: bl 0xc01680f0 <preempt_count_add> 0xc10638c4 <_raw_spin_lock_irqsave+44>: pldw [r4] 0xc10638c8 <_raw_spin_lock_irqsave+48>: ldrex r3, [r4] //tj: pc 0xc10638cc <_raw_spin_lock_irqsave+52>: add r2, r3, #65536 ; 0x10000 0xc10638d0 <_raw_spin_lock_irqsave+56>: strex r1, r2, [r4] 0xc10638d4 <_raw_spin_lock_irqsave+60>: teq r1, #0 0xc10638d8 <_raw_spin_lock_irqsave+64>: bne 0xc10638c8 <_raw_spin_lock_irqsave+48> 0xc10638dc <_raw_spin_lock_irqsave+68>: lsr r2, r3, #16 0xc10638e0 <_raw_spin_lock_irqsave+72>: uxth r3, r3 0xc10638e4 <_raw_spin_lock_irqsave+76>: cmp r2, r3 0xc10638e8 <_raw_spin_lock_irqsave+80>: beq 0xc10638fc <_raw_spin_lock_irqsave+100> 0xc10638ec <_raw_spin_lock_irqsave+84>: wfe 0xc10638f0 <_raw_spin_lock_irqsave+88>: ldrh r3, [r4] 0xc10638f4 <_raw_spin_lock_irqsave+92>: cmp r2, r3 0xc10638f8 <_raw_spin_lock_irqsave+96>: bne 0xc10638ec <_raw_spin_lock_irqsave+84> 0xc10638fc <_raw_spin_lock_irqsave+100>: dmb ish 0xc1063900 <_raw_spin_lock_irqsave+104>: mov r0, r5 0xc1063904 <_raw_spin_lock_irqsave+108>: ldm sp, {r4, r5, r11, sp, pc} crash.arm>
pc:
0xc10638c8 <_raw_spin_lock_irqsave+48>: ldrex r3, [r4] //tj: pc
现场r4是:
r4就来源r0:
0xc10638ac <_raw_spin_lock_irqsave+20>: mov r4, r0
r0就是_raw_spin_lock_irqsave()
的入参喽。前面看过c code了:
157u nsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
这个lock
是000001e0? 不是[ 4.790798] Unable to handle kernel NULL pointer dereference at virtual address 000001e0
吗?哪里有0?
看下NULL pointer
由来:
131 static void 132 __do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,133 struct pt_regs *regs)134 {135 138 if (fixup_exception(regs))139 return ;140 141 144 bust_spinlocks(1 );145 pr_alert("Unable to handle kernel %s at virtual address %08lx\n" ,146 (addr < PAGE_SIZE) ? "NULL pointer dereference" :147 "paging request" , addr);
好嘛,只要addr < PAGE_SIZE
就是NULL pointer
了。那_raw_spin_lock_irqsave(raw_spinlock_t *lock)
入参lock
是不是000001e0?继续分析:
这个lock
来源:
335 #define spin_lock_irqsave(lock, flags) \ 336do { \ 337 raw_spin_lock_irqsave(spinlock_check(lock), flags); \ 338} while (0)
spinlock_check
的反汇编check下:
crash.arm> dis spinlock_check symbol not found: spinlock_check possible alternatives: (none found)
应该是inline的原因:
289 static __always_inline raw_spinlock_t *spinlock_check (spinlock_t *lock) 290{ 291 return &lock->rlock;292 }
spinlock_t结构:
typedef struct spinlock { union { struct raw_spinlock rlock ; }; } spinlock_t ;
继续看caller:
113 void power_supply_changed (struct power_supply *psy) 114{ 115 unsigned long flags;116 117 dev_dbg(&psy->dev, "%s\n" , __func__);118 119 spin_lock_irqsave(&psy->changed_lock, flags);
so,这里的&psy->changed_lock
是000001e0吗? 继续反汇编分析:
版权声明: 本站所有文章均采用 CC BY-NC-SA 4.0 CN 许可协议。转载请注明原文链接!