项目经验之谈--驱动崩溃分析之栈回溯技术与反汇编

    xiaoxiao2021-09-21  29

    1.序言

    驱动往往是芯片厂商提供的,而且是不开源的。 一旦崩溃很难查找原因,当然办法是有的,比如内核为此也提供栈回溯技术(低版本的好像没有实现)来定位分析驱动问题。再不济也可以反汇编ko文件。

    2.栈回溯

    栈回溯技术由来已久,已经不是一个什么新鲜的词汇了,但它在开发、调试当中非常重要、不管是Linux kernel、驱动、还是应用开发。 如果掌握了栈回溯技术,对程序员职业生涯有着非同寻常的意义。只要涉及到栈,难免跟体系架构分不开。 无论是ARM架构、X86架构还是MIPS,其实现栈回溯的方法肯定是不同的。Linux2.6早起的版本还未实现栈回溯。后期的版本都已经实现。 Linux kernel : 内核arch相关目录下有对栈回溯实现代码。关键函数dump_stack。 Linux app:应用程序则需要条用第三方库来使用栈回溯的功能。如glibc。

    ⑴Linux kernel 实现

    NOTE:该部分代码引用自Linux3.8.13 void dump_stack(void) { dump_backtrace(NULL, NULL); } static inline void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) { unwind_backtrace(regs, tsk); } void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk) { struct stackframe frame; register unsigned long current_sp asm ("sp"); pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); if (!tsk) tsk = current; if (regs) { frame.fp = regs->ARM_fp; frame.sp = regs->ARM_sp; frame.lr = regs->ARM_lr; /* PC might be corrupted, use LR in that case. */ frame.pc = kernel_text_address(regs->ARM_pc) ? regs->ARM_pc : regs->ARM_lr; } else if (tsk == current) { frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_sp; frame.lr = (unsigned long)__builtin_return_address(0); frame.pc = (unsigned long)unwind_backtrace; } else { /* task blocked in __switch_to */ frame.fp = thread_saved_fp(tsk); frame.sp = thread_saved_sp(tsk); /* * The function calling __switch_to cannot be a leaf function * so LR is recovered from the stack. */ frame.lr = 0; frame.pc = thread_saved_pc(tsk); } while (1) { int urc; unsigned long where = frame.pc; urc = unwind_frame(&frame); if (urc < 0) break; dump_backtrace_entry(where, frame.pc, frame.sp - 4); } } void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame) { #ifdef CONFIG_KALLSYMS printk("[<lx>] (%pS) from [<lx>] (%pS)\n", where, (void *)where, from, (void *)from); #else printk("Function entered at [<lx>] from [<lx>]\n", where, from); #endif if (in_exception_text(where)) dump_mem("", "Exception stack", frame + 4, frame + 4 + sizeof(struct pt_regs)); } /* * Dump out the contents of some memory nicely... */ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, unsigned long top) { unsigned long first; mm_segment_t fs; int i; /* * We need to switch to kernel mode so that we can use __get_user * to safely read from kernel space. Note that we now dump the * code first, just in case the backtrace kills us. */ fs = get_fs(); set_fs(KERNEL_DS); printk("%s%s(0xlx to 0xlx)\n", lvl, str, bottom, top); for (first = bottom & ~31; first < top; first += 32) { unsigned long p; char str[sizeof(" 12345678") * 8 + 1]; memset(str, ' ', sizeof(str)); str[sizeof(str) - 1] = '\0'; for (p = first, i = 0; i < 8 && p < top; i++, p += 4) { if (p >= bottom && p < top) { unsigned long val; if (__get_user(val, (unsigned long *)p) == 0) sprintf(str + i * 9, " lx", val); else sprintf(str + i * 9, " ????????"); } } printk("%slx:%s\n", lvl, first & 0xffff, str); } set_fs(fs); }

    ⑵glibc 实现

    NOTE : 该部分代码引自glibc-2.24

    <1> 测试用例

    static int compare (const void *p1, const void *p2) { void *ba[20]; int n = backtrace (ba, sizeof (ba) / sizeof (ba[0])); if (n != 0) { char **names = backtrace_symbols (ba, n); if (names != NULL) { int i; printf ("called from %s\n", names[0]); for (i = 1; i < n; ++i) printf (" %s\n", names[i]); free (names); } } return *(const uint32_t *) p1 - *(const uint32_t *) p2; } int main (int argc, char *argv[]) { uint32_t arr[20]; size_t cnt; for (cnt = 0; cnt < sizeof (arr) / sizeof (arr[0]); ++cnt) arr[cnt] = random (); qsort (arr, sizeof (arr) / sizeof (arr[0]), sizeof (arr[0]), compare); for (cnt = 0; cnt < sizeof (arr) / sizeof (arr[0]); ++cnt) printf ("%" PRIx32 "\n", arr[cnt]); return 0; }

    <2> backtrace 函数实现

    int __backtrace (void **array, int size) { struct layout *current; void *top_frame; void *top_stack; int cnt = 0; top_frame = FIRST_FRAME_POINTER; top_stack = CURRENT_STACK_FRAME; /* We skip the call to this function, it makes no sense to record it. */ current = ((struct layout *) top_frame); while (cnt < size) { if ((void *) current INNER_THAN top_stack || !((void *) current INNER_THAN __libc_stack_end)) /* This means the address is out of range. Note that for the toplevel we see a frame pointer with value NULL which clearly is out of range. */ break; array[cnt++] = current->return_address; current = ADVANCE_STACK_FRAME (current->next); } return cnt; } weak_alias (__backtrace, backtrace) libc_hidden_def (__backtrace)

    <3> backtrace_symbols 函数实现

    char ** __backtrace_symbols (void *const *array, int size) { Dl_info info[size]; int status[size]; int cnt; size_t total = 0; char **result; /* Fill in the information we can get from `dladdr'. */ for (cnt = 0; cnt < size; ++cnt) { struct link_map *map; status[cnt] = _dl_addr (array[cnt], &info[cnt], &map, NULL); if (status[cnt] && info[cnt].dli_fname && info[cnt].dli_fname[0] != '\0') { /* We have some info, compute the length of the string which will be "<file-name>(<sym-name>+offset) [address]. */ total += (strlen (info[cnt].dli_fname ?: "") + strlen (info[cnt].dli_sname ?: "") + 3 + WORD_WIDTH + 3 + WORD_WIDTH + 5); /* The load bias is more useful to the user than the load address. The use of these addresses is to calculate an address in the ELF file, so its prelinked bias is not something we want to subtract out. */ info[cnt].dli_fbase = (void *) map->l_addr; } else total += 5 + WORD_WIDTH; } /* Allocate memory for the result. */ result = (char **) malloc (size * sizeof (char *) + total); if (result != NULL) { char *last = (char *) (result + size); for (cnt = 0; cnt < size; ++cnt) { result[cnt] = last; if (status[cnt] && info[cnt].dli_fname != NULL && info[cnt].dli_fname[0] != '\0') { if (info[cnt].dli_sname == NULL) /* We found no symbol name to use, so describe it as relative to the file. */ info[cnt].dli_saddr = info[cnt].dli_fbase; if (info[cnt].dli_sname == NULL && info[cnt].dli_saddr == 0) last += 1 + sprintf (last, "%s(%s) [%p]", info[cnt].dli_fname ?: "", info[cnt].dli_sname ?: "", array[cnt]); else { char sign; ptrdiff_t offset; if (array[cnt] >= (void *) info[cnt].dli_saddr) { sign = '+'; offset = array[cnt] - info[cnt].dli_saddr; } else { sign = '-'; offset = info[cnt].dli_saddr - array[cnt]; } last += 1 + sprintf (last, "%s(%s%c%#tx) [%p]", info[cnt].dli_fname ?: "", info[cnt].dli_sname ?: "", sign, offset, array[cnt]); } } else last += 1 + sprintf (last, "[%p]", array[cnt]); } assert (last <= (char *) result + size * sizeof (char *) + total); } return result; } weak_alias (__backtrace_symbols, backtrace_symbols)

    3.反汇编

    掌握了栈回溯技术,我们心里可能还是不那么踏实,毕竟驱动程序大部分是不开源的。你无法去跟踪分析,栈信息也无用。这里我们可以利用反汇编技术。来定位问题。

    objdump -D test.ko > test.ko.dis

    4. /proc文件系统

    4.1 查看模块加载的起始地址

    cat /proc/modules NV_Driver 75164 0 - Live 0xbf37d000 mt7601Usta 687475 1 - Live 0xbf2bc000 hi3518_adec 16147 1 - Live 0xbf2b5000 (P) hi3518_aenc 53068 1 - Live 0xbf2a4000 (P) hi3518_ao 40719 1 - Live 0xbf296000 (P) hi3518_ai 46478 1 - Live 0xbf285000 (P) hi3518_sio 12541 3 hi3518_ao,hi3518_ai, Live 0xbf27d000 (P) hidmac 13593 2 hi3518_ao,hi3518_ai, Live 0xbf275000 acodec 9611 0 - Live 0xbf26f000 (P) ssp_ad9020 4993 0 - Live 0xbf26a000 hi_rtc 4363 0 - Live 0xbf265000 pwm 1489 0 - Live 0xbf261000 hi_i2c 3792 0 - Live 0xbf25d000 hi3518_ive 32474 1 - Live 0xbf251000 (P) hi3518_vda 172473 1 - Live 0xbf220000 (P) hi3518_region 51571 1 - Live 0xbf20e000 (P) hi3518_rc 152468 1 - Live 0xbf1e1000 (P) hi3518_jpege 48313 1 - Live 0xbf1d0000 (P) hi3518_h264e 148654 1 - Live 0xbf1a5000 (P) hi3518_chnl 39965 1 - Live 0xbf196000 (P) hi3518_group 178766 1 - Live 0xbf165000 (P) hi3518_venc 87386 3 hi3518_jpege,hi3518_h264e, Live 0xbf14a000 (P) hi3518_vou 258761 1 - Live 0xbf0fa000 (P) hi3518_vpss 208550 1 - Live 0xbf0bf000 (P) hi3518_isp 21496 3 ssp_ad9020,hi_i2c, Live 0xbf0b5000 (P) hi3518_viu 178785 1 - Live 0xbf07e000 (P) hi3518_dsu 129358 1 - Live 0xbf059000 (P) hi3518_tde 117632 1 hi3518_dsu, Live 0xbf033000 (P) hiuser 890 2 hi3518_jpege,hi3518_h264e, Live 0xbf02f000 hi3518_sys 40932 2 hi3518_viu, Live 0xbf01f000 (P) hi3518_base 43832 21 hi3518_adec,hi3518_aenc,hi3518_ao,hi3518_ai,hi3518_sio,acodec,hi3518_ive,hi3518_vda,hi3518_region,hi3518_rc,hi3518_jpege,hi3518_h264e,hi3518_chnl,hi3518_group,hi3518_venc,hi3518_vou,hi3518_vpss,hi3518_isp,hi3518_viu,hi3518_dsu,hi3518_sys, Live 0xbf00e000 (P) mmz 19713 4 hi3518_aenc,hi3518_h264e,hi3518_tde,hi3518_base, Live 0xbf005000 wdt 4229 0 - Live 0xbf000000

    4.2 查看内核symbol表

    cat /proc/kallsyms

    5.例子

    <1>现象1 # ./sample_venc 0 please press twice ENTER to exit this sample Wed Nov 16 10:58:31 WAUST 2016 Get ISP Interrupt Failed with ec 0x1! Get ISP Interrupt Failed with ec 0x1! [SAMPLE_COMM_VENC_GetVencStreamProc]-920: get venc stream time out, exit thread Get ISP Interrupt Failed with ec 0x1! Wed Nov 16 10:59:04 WAUST 2016 Get ISP Interrupt Failed with ec 0x1! Get ISP Interrupt Failed with ec 0x1! [SAMPLE_COMM_VENC_GetVencStreamProc]-920: get venc stream time out, exit thread Get ISP Interrupt Failed with ec 0x1! Wed Nov 16 10:59:06 WAUST 2016 Get ISP Interrupt Failed with ec 0x1! Get ISP Interrupt Failed with ec 0x1! ================vedu 0 debug info============= ===debug info from 0000-0154=== 0000 : 00000000 00000000 00000000 00000000 0010 : 00000000 00000000 00000000 00000000 0020 : 00000000 00000000 00000000 00000000 0030 : 00000000 00000000 00000000 00000000 0040 : 00000000 00000000 00000000 00000000 0050 : 00000000 00000000 00000000 00000000 0060 : 00000000 00000000 00000000 00000000 0070 : 00000000 00000000 00000000 00000000 0080 : 00000000 00000000 00000000 00000000 0090 : 00000000 00000000 00000000 00000000 00a0 : 00000000 00000000 00000000 00000000 00b0 : 00000000 00000000 00000000 00000000 00c0 : 00000000 00000000 00000000 00000000 00d0 : 00000000 00000000 00000000 00000000 00e0 : 00000000 00000000 00000000 00000000 00f0 : 00000000 00000000 00000000 00000000 0100 : 00000000 00000000 00000000 00000000 0110 : 00000000 00000000 00000000 00000000 0120 : 00000000 00000000 00000000 00000000 0130 : 00000000 00000000 00000000 00000000 0140 : 00000000 00000000 00000000 00000000 0150 : 00000000 00000000 00000000 00000000 ===debug info from 0a50-0a9c=== 0a50 : 00000000 00000000 00000000 00000000 0a60 : 00000000 00000000 00000000 00000000 0a70 : 00000000 00000000 00000000 00000000 0a80 : 00000000 00000000 00000000 00000000 0a90 : 00000000 00000000 00000000 00000000 VEDU_0 :watchdog here. Bitatream channel error! s32ChnID : 2 ,pstNaluHead->s32ChnID : -825307441 Kernel panic - not syncing: ASSERT failed at: >File name: /home/pub/platform_h3/mpp/code/mkp/vedu/h264e/h264e_ext.c >Function : H264E_GetBitStream >Line No. : 1880 >Condition: pstNaluHead->s32ChnID == s32ChnID Backtrace: [<c0031ff4>] (dump_backtrace+0x0/0x10c) from [<c039c9c4>] (dump_stack+0x18/0x1c) r6:c16290d0 r5:c04aa810 r4:c04aa810 r3:0000000a [<c039c9ac>] (dump_stack+0x0/0x1c) from [<c039ca2c>] (panic+0x64/0x190) [<c039c9c8>] (panic+0x0/0x190) from [<bf1b6794>] (H264E_GetBitStream+0xa94/0xb18 [hi3518_h264e]) r3:00000758 r2:bf1c33f4 r1:bf1c4138 r0:bf1c40e0 r7:00002840 [<bf1b5d00>] (H264E_GetBitStream+0x0/0xb18 [hi3518_h264e]) from [<bf151c3c>] (VencPacktoVencBuf+0x3a4/0x6ac [hi3518_venc]) [<bf151898>] (VencPacktoVencBuf+0x0/0x6ac [hi3518_venc]) from [<bf151fa0>] (VENC_FrameOverNotify+0x5c/0x214 [hi3518_venc]) [<bf151f44>] (VENC_FrameOverNotify+0x0/0x214 [hi3518_venc]) from [<bf1b7f1c>] (H264E_IntProcess+0x1ec/0x4fc [hi3518_h264e]) r8:bf015e84 r7:00000001 r6:c1357b00 r5:c2918000 r4:00000002 [<bf1b7d30>] (H264E_IntProcess+0x0/0x4fc [hi3518_h264e]) from [<bf1672bc>] (GROUP_IntHandlerCallBack+0x174/0x270 [hi3518_group]) r8:41a64e76 r7:bf19d790 r6:bf1c6b8c r5:00000002 r4:bf1723b0 [<bf167148>] (GROUP_IntHandlerCallBack+0x0/0x270 [hi3518_group]) from [<bf199488>] (ChnlVeduIntHandler+0x2a0/0x6ac [hi3518_chnl]) r7:bf19d790 r6:c1357b50 r5:c2800350 r4:bf19d7e8 [<bf1991e8>] (ChnlVeduIntHandler+0x0/0x6ac [hi3518_chnl]) from [<bf199954>] (ChnlVeduISR+0xc0/0x154 [hi3518_chnl]) [<bf199894>] (ChnlVeduISR+0x0/0x154 [hi3518_chnl]) from [<c0071910>] (handle_irq_event_percpu+0x54/0x1b4) r6:00000000 r5:00000018 r4:c13ac7c0 [<c00718bc>] (handle_irq_event_percpu+0x0/0x1b4) from [<c0071aa0>] (handle_irq_event+0x30/0x40) [<c0071a70>] (handle_irq_event+0x0/0x40) from [<c0073d44>] (handle_level_irq+0x88/0xe4) r4:c1c05900 r3:c0492788 [<c0073cbc>] (handle_level_irq+0x0/0xe4) from [<c00718a0>] (generic_handle_irq+0x30/0x38) r4:00000018 r3:c0073cbc [<c0071870>] (generic_handle_irq+0x0/0x38) from [<c0027038>] (asm_do_IRQ+0x38/0x8c) r4:c0499934 r3:00000020 [<c0027000>] (asm_do_IRQ+0x0/0x8c) from [<c002df34>] (__irq_svc+0x34/0xa0) Exception stack(0xc1357c50 to 0xc1357c98) 7c40: c1da3d20 00000010 80000280 80000d80 7c60: 00000000 ffdfd480 80000c80 ffdfd480 c1357d64 c1da76dc ffffffff c1357cb4 7c80: c1357cb8 c1357c98 c023e89c c023e170 60000013 ffffffff r6:00000001 r5:fe140000 r4:ffffffff r3:60000013 [<c023e15c>] (ehci_qtd_alloc+0x0/0x60) from [<c023e89c>] (qh_urb_transaction+0x2d4/0x458) r5:ffdfd480 r4:00000000 [<c023e5c8>] (qh_urb_transaction+0x0/0x458) from [<c023f940>] (ehci_urb_enqueue+0xa0/0xf94) [<c023f8a0>] (ehci_urb_enqueue+0x0/0xf94) from [<c02291c4>] (usb_hcd_submit_urb+0xc0/0x6bc) [<c0229104>] (usb_hcd_submit_urb+0x0/0x6bc) from [<c0229f60>] (usb_submit_urb+0xfc/0x2e8) [<c0229e64>] (usb_submit_urb+0x0/0x2e8) from [<c022b418>] (usb_start_wait_urb+0x48/0xcc) r8:c1357e54 r7:0000001e r6:00000000 r5:c1357e10 r4:c162d400 r3:c162d400 [<c022b3d0>] (usb_start_wait_urb+0x0/0xcc) from [<c022b6a8>] (usb_control_msg+0xd4/0xf8) r8:00000000 r7:00001718 r6:00000004 r5:c022aea8 r4:c1625b60 [<c022b5d4>] (usb_control_msg+0x0/0xf8) from [<bf32265c>] (RTUSB_VendorRequest+0x1ac/0x2bc [mt7601Usta]) [<bf3224b0>] (RTUSB_VendorRequest+0x0/0x2bc [mt7601Usta]) from [<bf3228b4>] (RTUSBReadMACRegister+0x40/0x58 [mt7601Usta]) [<bf322874>] (RTUSBReadMACRegister+0x0/0x58 [mt7601Usta]) from [<bf2f0b7c>] (NICUpdateFifoStaCounters+0x38/0x3d8 [mt7601Usta]) r4:c2e96000 [<bf2f0b44>] (NICUpdateFifoStaCounters+0x0/0x3d8 [mt7601Usta]) from [<bf2e94a8>] (MlmePeriodicExec+0xb4/0x3ec [mt7601Usta]) [<bf2e93f4>] (MlmePeriodicExec+0x0/0x3ec [mt7601Usta]) from [<bf3053c8>] (RtmpTimerQThread+0x148/0x168 [mt7601Usta]) r6:000ccee9 r5:c2ea8b0c r4:c2e96000 [<bf305280>] (RtmpTimerQThread+0x0/0x168 [mt7601Usta]) from [<c005a504>] (kthread+0x90/0x98) [<c005a474>] (kthread+0x0/0x98) from [<c00430a8>] (do_exit+0x0/0x700) r7:00000013 r6:c00430a8 r5:c005a474 r4:c1267c48 ① 可以用 命令查看 驱动被加载到kernel的哪段地址空间,如下: # cat /proc/modules NV_Driver 75164 0 - Live 0xbf37d000 mt7601Usta 687475 1 - Live 0xbf2bc000 hi3518_adec 16147 1 - Live 0xbf2b5000 (P) hi3518_aenc 53068 1 - Live 0xbf2a4000 (P) hi3518_ao 40719 1 - Live 0xbf296000 (P) hi3518_ai 46478 1 - Live 0xbf285000 (P) hi3518_sio 12541 3 hi3518_ao,hi3518_ai, Live 0xbf27d000 (P) hidmac 13593 2 hi3518_ao,hi3518_ai, Live 0xbf275000 acodec 9611 0 - Live 0xbf26f000 (P) ssp_ad9020 4993 0 - Live 0xbf26a000 hi_rtc 4363 0 - Live 0xbf265000 pwm 1489 0 - Live 0xbf261000 hi_i2c 3792 0 - Live 0xbf25d000 hi3518_ive 32474 1 - Live 0xbf251000 (P) hi3518_vda 172473 1 - Live 0xbf220000 (P) hi3518_region 51571 1 - Live 0xbf20e000 (P) hi3518_rc 152468 1 - Live 0xbf1e1000 (P) hi3518_jpege 48313 1 - Live 0xbf1d0000 (P) hi3518_h264e 148654 1 - Live 0xbf1a5000 (P) hi3518_chnl 39965 1 - Live 0xbf196000 (P) hi3518_group 178766 1 - Live 0xbf165000 (P) hi3518_venc 87386 3 hi3518_jpege,hi3518_h264e, Live 0xbf14a000 (P) hi3518_vou 258761 1 - Live 0xbf0fa000 (P) hi3518_vpss 208550 1 - Live 0xbf0bf000 (P) hi3518_isp 21496 3 ssp_ad9020,hi_i2c, Live 0xbf0b5000 (P) hi3518_viu 178785 1 - Live 0xbf07e000 (P) hi3518_dsu 129358 1 - Live 0xbf059000 (P) hi3518_tde 117632 1 hi3518_dsu, Live 0xbf033000 (P) hiuser 890 2 hi3518_jpege,hi3518_h264e, Live 0xbf02f000 hi3518_sys 40932 2 hi3518_viu, Live 0xbf01f000 (P) hi3518_base 43832 21 hi3518_adec,hi3518_aenc,hi3518_ao,hi3518_ai,hi3518_sio,acodec,hi3518_ive,hi3518_vda,hi3518_region,hi3518_rc,hi3518_jpege,hi3518_h264e,hi3518_chnl,hi3518_group,hi3518_venc,hi3518_vou,hi3518_vpss,hi3518_isp,hi3518_viu,hi3518_dsu,hi3518_sys, Live 0xbf00e000 (P) mmz 19713 4 hi3518_aenc,hi3518_h264e,hi3518_tde,hi3518_base, Live 0xbf005000 wdt 4229 0 - Live 0xbf000000 # ② 结合 hi3518_h264e.ko.dis.txt 反汇编文件分析. [<c039c9c8>] (panic+0x0/0x190) from [<bf1b6794>] (H264E_GetBitStream+0xa94/0xb18 [hi3518_h264e]) hi3518_h264e 148654 1 - Live 0xbf1a5000 (P) 00010d00 <H264E_GetBitStream>: 计算:bf1b6794 = bf1a5000 + 00010d00 + 0xa94 00010d00 + 0xa94 = 11794 再来看看 hi3518_h264e.ko.dis.txt 文件 11794 位置附近代码 1178c: e58d4000 str r4, [sp] 11790: ebfffffe bl 0 <panic> 11794: 00000000 andeq r0, r0, r0 11798: 000026ec andeq r2, r0, ip, ror #13 分析: 11790 跳转到 panic <panic为内核系统代码, 正是内核panic函数打印出的栈回溯信息> 11794 地址恰好是PC指针指向的位置.

    I. 总结

    定位内核/驱动崩溃可以结合上述技术,就可以达到事半功倍的效果了。
    转载请注明原文地址: https://ju.6miu.com/read-677773.html

    最新回复(0)