1.序言
驱动往往是芯片厂商提供的,而且是不开源的。
一旦崩溃很难查找原因,当然办法是有的,比如内核为此也提供栈回溯技术(低版本的好像没有实现)来定位分析驱动问题。再不济也可以反汇编ko文件。
2.栈回溯
栈回溯技术由来已久,已经不是一个什么新鲜的词汇了,但它在开发、调试当中非常重要、不管是Linux kernel、驱动、还是应用开发。
如果掌握了栈回溯技术,对程序员职业生涯有着非同寻常的意义。只要涉及到栈,难免跟体系架构分不开。
无论是ARM架构、X86架构还是MIPS,其实现栈回溯的方法肯定是不同的。Linux2.6早起的版本还未实现栈回溯。后期的版本都已经实现。
Linux kernel : 内核arch相关目录下有对栈回溯实现代码。关键函数dump_stack。
Linux app:应用程序则需要条用第三方库来使用栈回溯的功能。如glibc。
⑴Linux kernel 实现
NOTE:该部分代码引用自Linux3
.8.13
void dump_stack(
void)
{
dump_backtrace(
NULL,
NULL);
}
static inline void dump_backtrace(
struct pt_regs *regs,
struct task_struct *tsk)
{
unwind_backtrace(regs, tsk);
}
void unwind_backtrace(
struct pt_regs *regs,
struct task_struct *tsk)
{
struct stackframe frame;
register unsigned long current_sp
asm (
"sp");
pr_debug(
"%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
if (!tsk)
tsk = current;
if (regs) {
frame
.fp = regs->ARM_fp;
frame
.sp = regs->ARM_sp;
frame
.lr = regs->ARM_lr;
frame
.pc = kernel_text_address(regs->ARM_pc)
? regs->ARM_pc : regs->ARM_lr;
}
else if (tsk == current) {
frame
.fp = (
unsigned long)__builtin_frame_address(
0);
frame
.sp = current_sp;
frame
.lr = (
unsigned long)__builtin_return_address(
0);
frame
.pc = (
unsigned long)unwind_backtrace;
}
else {
frame
.fp = thread_saved_fp(tsk);
frame
.sp = thread_saved_sp(tsk);
frame
.lr =
0;
frame
.pc = thread_saved_pc(tsk);
}
while (
1) {
int urc;
unsigned long where = frame
.pc;
urc = unwind_frame(&frame);
if (urc <
0)
break;
dump_backtrace_entry(where, frame
.pc, frame
.sp -
4);
}
}
void dump_backtrace_entry(unsigned
long where, unsigned
long from, unsigned
long frame)
{
#ifdef CONFIG_KALLSYMS
printk(
"[<lx>] (%pS) from [<lx>] (%pS)\n",
where, (
void *)
where,
from, (
void *)
from);
#else
printk(
"Function entered at [<lx>] from [<lx>]\n",
where,
from);
#endif
if (in_exception_text(
where))
dump_mem(
"",
"Exception stack", frame +
4, frame +
4 +
sizeof(
struct pt_regs));
}
static void dump_mem(
const char *lvl,
const char *str,
unsigned long bottom,
unsigned long top)
{
unsigned long first;
mm_segment_t fs;
int i;
fs = get_fs();
set_fs(KERNEL_DS);
printk(
"%s%s(0xlx to 0xlx)\n", lvl, str, bottom, top);
for (first = bottom & ~
31; first < top; first +=
32) {
unsigned long p;
char str[
sizeof(
" 12345678") *
8 +
1];
memset(str,
' ',
sizeof(str));
str[
sizeof(str) -
1] =
'\0';
for (p = first, i =
0; i <
8 && p < top; i++, p +=
4) {
if (p >= bottom && p < top) {
unsigned long val;
if (__get_user(val, (
unsigned long *)p) ==
0)
sprintf(str + i *
9,
" lx", val);
else
sprintf(str + i *
9,
" ????????");
}
}
printk(
"%slx:%s\n", lvl, first &
0xffff, str);
}
set_fs(fs);
}
⑵glibc 实现
NOTE : 该部分代码引自glibc-2.24
<1> 测试用例
static int
compare (
const void *p1,
const void *p2)
{
void *ba[
20];
int n = backtrace (ba,
sizeof (ba) /
sizeof (ba[
0]));
if (n !=
0)
{
char **names = backtrace_symbols (ba, n);
if (names != NULL)
{
int i;
printf (
"called from %s\n", names[
0]);
for (i =
1; i < n; ++i)
printf (
" %s\n", names[i]);
free (names);
}
}
return *(
const uint32_t *) p1 - *(
const uint32_t *) p2;
}
int
main (
int argc,
char *argv[])
{
uint32_t arr[
20];
size_t cnt;
for (cnt =
0; cnt <
sizeof (arr) /
sizeof (arr[
0]); ++cnt)
arr[cnt] = random ();
qsort (arr,
sizeof (arr) /
sizeof (arr[
0]),
sizeof (arr[
0]), compare);
for (cnt =
0; cnt <
sizeof (arr) /
sizeof (arr[
0]); ++cnt)
printf (
"%" PRIx32
"\n", arr[cnt]);
return 0;
}
<2> backtrace 函数实现
int
__backtrace (
void **array,
int size)
{
struct layout *current;
void *top_frame;
void *top_stack;
int cnt =
0;
top_frame = FIRST_FRAME_POINTER;
top_stack = CURRENT_STACK_FRAME;
current = ((
struct layout *) top_frame);
while (cnt < size)
{
if ((
void *) current INNER_THAN top_stack
|| !((
void *) current INNER_THAN __libc_stack_end))
break;
array[cnt++] = current->return_address;
current = ADVANCE_STACK_FRAME (current->next);
}
return cnt;
}
weak_alias (__backtrace, backtrace)
libc_hidden_def (__backtrace)
<3> backtrace_symbols 函数实现
char **
__backtrace_symbols (
void *
const *
array,
int size)
{
Dl_info info[size];
int status[size];
int cnt;
size_t total =
0;
char **result;
for (cnt =
0; cnt < size; ++cnt)
{
struct link_map *
map;
status[cnt] = _dl_addr (
array[cnt], &info[cnt], &
map, NULL);
if (status[cnt] && info[cnt].dli_fname && info[cnt].dli_fname[
0] !=
'\0')
{
total += (
strlen (info[cnt].dli_fname ?:
"")
+
strlen (info[cnt].dli_sname ?:
"")
+
3 + WORD_WIDTH +
3 + WORD_WIDTH +
5);
info[cnt].dli_fbase = (
void *)
map->l_addr;
}
else
total +=
5 + WORD_WIDTH;
}
result = (
char **)
malloc (size *
sizeof (
char *) + total);
if (result != NULL)
{
char *last = (
char *) (result + size);
for (cnt =
0; cnt < size; ++cnt)
{
result[cnt] = last;
if (status[cnt]
&& info[cnt].dli_fname != NULL && info[cnt].dli_fname[
0] !=
'\0')
{
if (info[cnt].dli_sname == NULL)
info[cnt].dli_saddr = info[cnt].dli_fbase;
if (info[cnt].dli_sname == NULL && info[cnt].dli_saddr ==
0)
last +=
1 +
sprintf (last,
"%s(%s) [%p]",
info[cnt].dli_fname ?:
"",
info[cnt].dli_sname ?:
"",
array[cnt]);
else
{
char sign;
ptrdiff_t offset;
if (
array[cnt] >= (
void *) info[cnt].dli_saddr)
{
sign =
'+';
offset =
array[cnt] - info[cnt].dli_saddr;
}
else
{
sign =
'-';
offset = info[cnt].dli_saddr -
array[cnt];
}
last +=
1 +
sprintf (last,
"%s(%s%c%#tx) [%p]",
info[cnt].dli_fname ?:
"",
info[cnt].dli_sname ?:
"",
sign, offset,
array[cnt]);
}
}
else
last +=
1 +
sprintf (last,
"[%p]",
array[cnt]);
}
assert (last <= (
char *) result + size *
sizeof (
char *) + total);
}
return result;
}
weak_alias (__backtrace_symbols, backtrace_symbols)
3.反汇编
掌握了栈回溯技术,我们心里可能还是不那么踏实,毕竟驱动程序大部分是不开源的。你无法去跟踪分析,栈信息也无用。这里我们可以利用反汇编技术。来定位问题。
objdump -D test
.ko > test
.ko.dis
4. /proc文件系统
4.1 查看模块加载的起始地址
cat /
proc/modules
NV_Driver 75164 0 - Live 0xbf37d000
mt7601Usta 687475 1 - Live 0xbf2bc000
hi3518_adec 16147 1 - Live 0xbf2b5000 (P)
hi3518_aenc 53068 1 - Live 0xbf2a4000 (P)
hi3518_ao 40719 1 - Live 0xbf296000 (P)
hi3518_ai 46478 1 - Live 0xbf285000 (P)
hi3518_sio 12541 3 hi3518_ao,hi3518_ai, Live 0xbf27d000 (P)
hidmac 13593 2 hi3518_ao,hi3518_ai, Live 0xbf275000
acodec 9611 0 - Live 0xbf26f000 (P)
ssp_ad9020 4993 0 - Live 0xbf26a000
hi_rtc 4363 0 - Live 0xbf265000
pwm 1489 0 - Live 0xbf261000
hi_i2c 3792 0 - Live 0xbf25d000
hi3518_ive 32474 1 - Live 0xbf251000 (P)
hi3518_vda 172473 1 - Live 0xbf220000 (P)
hi3518_region 51571 1 - Live 0xbf20e000 (P)
hi3518_rc 152468 1 - Live 0xbf1e1000 (P)
hi3518_jpege 48313 1 - Live 0xbf1d0000 (P)
hi3518_h264e 148654 1 - Live 0xbf1a5000 (P)
hi3518_chnl 39965 1 - Live 0xbf196000 (P)
hi3518_group 178766 1 - Live 0xbf165000 (P)
hi3518_venc 87386 3 hi3518_jpege,hi3518_h264e, Live 0xbf14a000 (P)
hi3518_vou 258761 1 - Live 0xbf0fa000 (P)
hi3518_vpss 208550 1 - Live 0xbf0bf000 (P)
hi3518_isp 21496 3 ssp_ad9020,hi_i2c, Live 0xbf0b5000 (P)
hi3518_viu 178785 1 - Live 0xbf07e000 (P)
hi3518_dsu 129358 1 - Live 0xbf059000 (P)
hi3518_tde 117632 1 hi3518_dsu, Live 0xbf033000 (P)
hiuser 890 2 hi3518_jpege,hi3518_h264e, Live 0xbf02f000
hi3518_sys 40932 2 hi3518_viu, Live 0xbf01f000 (P)
hi3518_base 43832 21 hi3518_adec,hi3518_aenc,hi3518_ao,hi3518_ai,hi3518_sio,acodec,hi3518_ive,hi3518_vda,hi3518_region,hi3518_rc,hi3518_jpege,hi3518_h264e,hi3518_chnl,hi3518_group,hi3518_venc,hi3518_vou,hi3518_vpss,hi3518_isp,hi3518_viu,hi3518_dsu,hi3518_sys, Live 0xbf00e000 (P)
mmz 19713 4 hi3518_aenc,hi3518_h264e,hi3518_tde,hi3518_base, Live 0xbf005000
wdt 4229 0 - Live 0xbf000000
4.2 查看内核symbol表
cat /
proc/kallsyms
5.例子
<
1>现象
1
# ./sample_venc 0
please press twice ENTER to exit this sample
Wed Nov
16 10:
58:
31 WAUST
2016
Get ISP Interrupt Failed with ec
0x1!
Get ISP Interrupt Failed with ec
0x1!
[SAMPLE_COMM_VENC_GetVencStreamProc]-
920: get venc stream time
out, exit thread
Get ISP Interrupt Failed with ec
0x1!
Wed Nov
16 10:
59:
04 WAUST
2016
Get ISP Interrupt Failed with ec
0x1!
Get ISP Interrupt Failed with ec
0x1!
[SAMPLE_COMM_VENC_GetVencStreamProc]-
920: get venc stream time
out, exit thread
Get ISP Interrupt Failed with ec
0x1!
Wed Nov
16 10:
59:
06 WAUST
2016
Get ISP Interrupt Failed with ec
0x1!
Get ISP Interrupt Failed with ec
0x1!
================vedu
0 debug info=============
===debug info from
0000-
0154===
0000 :
00000000 00000000 00000000 00000000
0010 :
00000000 00000000 00000000 00000000
0020 :
00000000 00000000 00000000 00000000
0030 :
00000000 00000000 00000000 00000000
0040 :
00000000 00000000 00000000 00000000
0050 :
00000000 00000000 00000000 00000000
0060 :
00000000 00000000 00000000 00000000
0070 :
00000000 00000000 00000000 00000000
0080 :
00000000 00000000 00000000 00000000
0090 :
00000000 00000000 00000000 00000000
00a0 :
00000000 00000000 00000000 00000000
00b0 :
00000000 00000000 00000000 00000000
00c0 :
00000000 00000000 00000000 00000000
00d0 :
00000000 00000000 00000000 00000000
00e0 :
00000000 00000000 00000000 00000000
00f0 :
00000000 00000000 00000000 00000000
0100 :
00000000 00000000 00000000 00000000
0110 :
00000000 00000000 00000000 00000000
0120 :
00000000 00000000 00000000 00000000
0130 :
00000000 00000000 00000000 00000000
0140 :
00000000 00000000 00000000 00000000
0150 :
00000000 00000000 00000000 00000000
===debug info from
0a50-
0a9c===
0a50 :
00000000 00000000 00000000 00000000
0a60 :
00000000 00000000 00000000 00000000
0a70 :
00000000 00000000 00000000 00000000
0a80 :
00000000 00000000 00000000 00000000
0a90 :
00000000 00000000 00000000 00000000
VEDU_0 :watchdog here.
Bitatream channel error! s32ChnID :
2 ,pstNaluHead->s32ChnID : -
825307441
Kernel panic - not syncing:
ASSERT failed at:
>File name: /home/pub/platform_h3/mpp/code/mkp/vedu/h264e/h264e_ext
.c
>Function : H264E_GetBitStream
>Line No. :
1880
>Condition: pstNaluHead->s32ChnID == s32ChnID
Backtrace:
[<c0031ff4>] (dump_backtrace+
0x0/
0x10c) from [<c039c9c4>] (dump_stack+
0x18/
0x1c)
r6:c16290d0
r5:c04aa810
r4:c04aa810
r3:
0000000a
[<c039c9ac>] (dump_stack+
0x0/
0x1c) from [<c039ca2c>] (panic+
0x64/
0x190)
[<c039c9c8>] (panic+
0x0/
0x190) from [<bf1b6794>] (H264E_GetBitStream+
0xa94/
0xb18 [hi3518_h264e])
r3:
00000758 r2:bf1c33f4
r1:bf1c4138
r0:bf1c40e0
r7:
00002840
[<bf1b5d00>] (H264E_GetBitStream+
0x0/
0xb18 [hi3518_h264e]) from [<bf151c3c>] (VencPacktoVencBuf+
0x3a4/
0x6ac [hi3518_venc])
[<bf151898>] (VencPacktoVencBuf+
0x0/
0x6ac [hi3518_venc]) from [<bf151fa0>] (VENC_FrameOverNotify+
0x5c/
0x214 [hi3518_venc])
[<bf151f44>] (VENC_FrameOverNotify+
0x0/
0x214 [hi3518_venc]) from [<bf1b7f1c>] (H264E_IntProcess+
0x1ec/
0x4fc [hi3518_h264e])
r8:bf015e84
r7:
00000001 r6:c1357b00
r5:c2918000
r4:
00000002
[<bf1b7d30>] (H264E_IntProcess+
0x0/
0x4fc [hi3518_h264e]) from [<bf1672bc>] (GROUP_IntHandlerCallBack+
0x174/
0x270 [hi3518_group])
r8:
41a64e76
r7:bf19d790
r6:bf1c6b8c
r5:
00000002 r4:bf1723b0
[<bf167148>] (GROUP_IntHandlerCallBack+
0x0/
0x270 [hi3518_group]) from [<bf199488>] (ChnlVeduIntHandler+
0x2a0/
0x6ac [hi3518_chnl])
r7:bf19d790
r6:c1357b50
r5:c2800350
r4:bf19d7e8
[<bf1991e8>] (ChnlVeduIntHandler+
0x0/
0x6ac [hi3518_chnl]) from [<bf199954>] (ChnlVeduISR+
0xc0/
0x154 [hi3518_chnl])
[<bf199894>] (ChnlVeduISR+
0x0/
0x154 [hi3518_chnl]) from [<c0071910>] (handle_irq_event_percpu+
0x54/
0x1b4)
r6:
00000000 r5:
00000018 r4:c13ac7c0
[<c00718bc>] (handle_irq_event_percpu+
0x0/
0x1b4) from [<c0071aa0>] (handle_irq_event+
0x30/
0x40)
[<c0071a70>] (handle_irq_event+
0x0/
0x40) from [<c0073d44>] (handle_level_irq+
0x88/
0xe4)
r4:c1c05900
r3:c0492788
[<c0073cbc>] (handle_level_irq+
0x0/
0xe4) from [<c00718a0>] (generic_handle_irq+
0x30/
0x38)
r4:
00000018 r3:c0073cbc
[<c0071870>] (generic_handle_irq+
0x0/
0x38) from [<c0027038>] (asm_do_IRQ+
0x38/
0x8c)
r4:c0499934
r3:
00000020
[<c0027000>] (asm_do_IRQ+
0x0/
0x8c) from [<c002df34>] (__irq_svc+
0x34/
0xa0)
Exception stack(
0xc1357c50 to
0xc1357c98)
7c40: c1da3d20
00000010 80000280 80000d80
7c60:
00000000 ffdfd480
80000c80 ffdfd480 c1357d64 c1da76dc ffffffff c1357cb4
7c80: c1357cb8 c1357c98 c023e89c c023e170
60000013 ffffffff
r6:
00000001 r5:fe140000
r4:ffffffff
r3:
60000013
[<c023e15c>] (ehci_qtd_alloc+
0x0/
0x60) from [<c023e89c>] (qh_urb_transaction+
0x2d4/
0x458)
r5:ffdfd480
r4:
00000000
[<c023e5c8>] (qh_urb_transaction+
0x0/
0x458) from [<c023f940>] (ehci_urb_enqueue+
0xa0/
0xf94)
[<c023f8a0>] (ehci_urb_enqueue+
0x0/
0xf94) from [<c02291c4>] (usb_hcd_submit_urb+
0xc0/
0x6bc)
[<c0229104>] (usb_hcd_submit_urb+
0x0/
0x6bc) from [<c0229f60>] (usb_submit_urb+
0xfc/
0x2e8)
[<c0229e64>] (usb_submit_urb+
0x0/
0x2e8) from [<c022b418>] (usb_start_wait_urb+
0x48/
0xcc)
r8:c1357e54
r7:
0000001e
r6:
00000000 r5:c1357e10
r4:c162d400
r3:c162d400
[<c022b3d0>] (usb_start_wait_urb+
0x0/
0xcc) from [<c022b6a8>] (usb_control_msg+
0xd4/
0xf8)
r8:
00000000 r7:
00001718 r6:
00000004 r5:c022aea8
r4:c1625b60
[<c022b5d4>] (usb_control_msg+
0x0/
0xf8) from [<bf32265c>] (RTUSB_VendorRequest+
0x1ac/
0x2bc [mt7601Usta])
[<bf3224b0>] (RTUSB_VendorRequest+
0x0/
0x2bc [mt7601Usta]) from [<bf3228b4>] (RTUSBReadMACRegister+
0x40/
0x58 [mt7601Usta])
[<bf322874>] (RTUSBReadMACRegister+
0x0/
0x58 [mt7601Usta]) from [<bf2f0b7c>] (NICUpdateFifoStaCounters+
0x38/
0x3d8 [mt7601Usta])
r4:c2e96000
[<bf2f0b44>] (NICUpdateFifoStaCounters+
0x0/
0x3d8 [mt7601Usta]) from [<bf2e94a8>] (MlmePeriodicExec+
0xb4/
0x3ec [mt7601Usta])
[<bf2e93f4>] (MlmePeriodicExec+
0x0/
0x3ec [mt7601Usta]) from [<bf3053c8>] (RtmpTimerQThread+
0x148/
0x168 [mt7601Usta])
r6:
000ccee9
r5:c2ea8b0c
r4:c2e96000
[<bf305280>] (RtmpTimerQThread+
0x0/
0x168 [mt7601Usta]) from [<c005a504>] (kthread+
0x90/
0x98)
[<c005a474>] (kthread+
0x0/
0x98) from [<c00430a8>] (do_exit+
0x0/
0x700)
r7:
00000013 r6:c00430a8
r5:c005a474
r4:c1267c48
① 可以用 命令查看 驱动被加载到kernel的哪段地址空间,如下:
# cat /proc/modules
NV_Driver
75164 0 - Live
0xbf37d000
mt7601Usta
687475 1 - Live
0xbf2bc000
hi3518_adec
16147 1 - Live
0xbf2b5000 (P)
hi3518_aenc
53068 1 - Live
0xbf2a4000 (P)
hi3518_ao
40719 1 - Live
0xbf296000 (P)
hi3518_ai
46478 1 - Live
0xbf285000 (P)
hi3518_sio
12541 3 hi3518_ao,hi3518_ai, Live
0xbf27d000 (P)
hidmac
13593 2 hi3518_ao,hi3518_ai, Live
0xbf275000
acodec
9611 0 - Live
0xbf26f000 (P)
ssp_ad9020
4993 0 - Live
0xbf26a000
hi_rtc
4363 0 - Live
0xbf265000
pwm
1489 0 - Live
0xbf261000
hi_i2c
3792 0 - Live
0xbf25d000
hi3518_ive
32474 1 - Live
0xbf251000 (P)
hi3518_vda
172473 1 - Live
0xbf220000 (P)
hi3518_region
51571 1 - Live
0xbf20e000 (P)
hi3518_rc
152468 1 - Live
0xbf1e1000 (P)
hi3518_jpege
48313 1 - Live
0xbf1d0000 (P)
hi3518_h264e
148654 1 - Live
0xbf1a5000 (P)
hi3518_chnl
39965 1 - Live
0xbf196000 (P)
hi3518_group
178766 1 - Live
0xbf165000 (P)
hi3518_venc
87386 3 hi3518_jpege,hi3518_h264e, Live
0xbf14a000 (P)
hi3518_vou
258761 1 - Live
0xbf0fa000 (P)
hi3518_vpss
208550 1 - Live
0xbf0bf000 (P)
hi3518_isp
21496 3 ssp_ad9020,hi_i2c, Live
0xbf0b5000 (P)
hi3518_viu
178785 1 - Live
0xbf07e000 (P)
hi3518_dsu
129358 1 - Live
0xbf059000 (P)
hi3518_tde
117632 1 hi3518_dsu, Live
0xbf033000 (P)
hiuser
890 2 hi3518_jpege,hi3518_h264e, Live
0xbf02f000
hi3518_sys
40932 2 hi3518_viu, Live
0xbf01f000 (P)
hi3518_base
43832 21 hi3518_adec,hi3518_aenc,hi3518_ao,hi3518_ai,hi3518_sio,acodec,hi3518_ive,hi3518_vda,hi3518_region,hi3518_rc,hi3518_jpege,hi3518_h264e,hi3518_chnl,hi3518_group,hi3518_venc,hi3518_vou,hi3518_vpss,hi3518_isp,hi3518_viu,hi3518_dsu,hi3518_sys, Live
0xbf00e000 (P)
mmz
19713 4 hi3518_aenc,hi3518_h264e,hi3518_tde,hi3518_base, Live
0xbf005000
wdt
4229 0 - Live
0xbf000000
#
② 结合 hi3518_h264e
.ko.dis.txt 反汇编文件分析.
[<c039c9c8>] (panic+
0x0/
0x190) from [<bf1b6794>] (H264E_GetBitStream+
0xa94/
0xb18 [hi3518_h264e])
hi3518_h264e
148654 1 - Live
0xbf1a5000 (P)
00010d00 <H264E_GetBitStream>:
计算:bf1b6794 = bf1a5000 +
00010d00 +
0xa94
00010d00 +
0xa94 =
11794
再来看看 hi3518_h264e
.ko.dis.txt 文件
11794 位置附近代码
1178c: e58d4000 str
r4, [sp]
11790: ebfffffe bl
0 <panic>
11794:
00000000 andeq
r0,
r0,
r0
11798:
000026ec andeq
r2,
r0, ip,
ror #13
分析:
11790 跳转到 panic <panic为内核系统代码, 正是内核panic函数打印出的栈回溯信息>
11794 地址恰好是PC指针指向的位置.
I. 总结
定位内核/驱动崩溃可以结合上述技术,就可以达到事半功倍的效果了。