dump_stack()用法

最近在看Linux USB Composite Framework的内容,经常看到函数指针跳转来跳转去。比如说会看到某个代码执行结构体中的.bind函数指针,但又不知道到底是谁在调用它。
此时,就可以用dump_stack()这个函数来追踪函数调用关系。当然,还是要自己尝试学习理解这个框架结构,不然纯粹的知道函数调用关系意义不大。另外,dump_stack()可用来定位Kernel Panic和Oop的问题,配合objdumpaddr2line可以定位到哪一行的哪句代码出现问题。


例子

比如说在以下3个结构体中都包含.bind的成员,都同属于Linux USB Composite Framework的范畴,看多了会不知道谁调用谁。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
struct usb_composite_driver {
const char *name;
const struct usb_device_descriptor *dev;
struct usb_gadget_strings **strings;
enum usb_device_speed max_speed;
unsigned needs_serial:1;

int (*bind)(struct usb_composite_dev *cdev);
int (*unbind)(struct usb_composite_dev *);

void (*disconnect)(struct usb_composite_dev *);

/* global suspend hooks */
void (*suspend)(struct usb_composite_dev *);
void (*resume)(struct usb_composite_dev *);
struct usb_gadget_driver gadget_driver;
};

struct usb_gadget_driver {
char *function;
enum usb_device_speed max_speed;
int (*bind)(struct usb_gadget *gadget,
struct usb_gadget_driver *driver);
void (*unbind)(struct usb_gadget *);
int (*setup)(struct usb_gadget *,
const struct usb_ctrlrequest *);
void (*disconnect)(struct usb_gadget *);
void (*suspend)(struct usb_gadget *);
void (*resume)(struct usb_gadget *);

/* FIXME support safe rmmod */
struct device_driver driver;
};

struct usb_function {
const char *name;
struct usb_gadget_strings **strings;
struct usb_descriptor_header **fs_descriptors;
struct usb_descriptor_header **hs_descriptors;
struct usb_descriptor_header **ss_descriptors;

struct usb_configuration *config;

/* REVISIT: bind() functions can be marked __init, which
* makes trouble for section mismatch analysis. See if
* we can't restructure things to avoid mismatching.
* Related: unbind() may kfree() but bind() won't...
*/

/* configuration management: bind/unbind */
int (*bind)(struct usb_configuration *,
struct usb_function *);
void (*unbind)(struct usb_configuration *,
struct usb_function *);
void (*free_func)(struct usb_function *f);
struct module *mod;

/* runtime state management */
int (*set_alt)(struct usb_function *,
unsigned interface, unsigned alt);
int (*get_alt)(struct usb_function *,
unsigned interface);
void (*disable)(struct usb_function *);
int (*setup)(struct usb_function *,
const struct usb_ctrlrequest *);
void (*suspend)(struct usb_function *);
void (*resume)(struct usb_function *);

/* USB 3.0 additions */
int (*get_status)(struct usb_function *);
int (*func_suspend)(struct usb_function *,
u8 suspend_opt);
/* private: */
/* internals */
struct list_head list;
DECLARE_BITMAP(endpoints, 32);
const struct usb_function_instance *fi;
};

int usb_add_config(struct usb_composite_dev *cdev,
struct usb_configuration *config,
int (*bind)(struct usb_configuration *))


测试用例

1
2
3
4
5
6
7
8
9
10
11
12
static int composite_bind(struct usb_gadget *gadget,
struct usb_gadget_driver *gdriver)
{
struct usb_composite_dev *cdev;
struct usb_composite_driver *composite = to_cdriver(gdriver);
int status = -ENOMEM;
printk("[xxx-dump] in %s, line = %d, dump start\n", __func__, __LINE__);
dump_stack();
printk("[xxx-dump] in %s, line = %d, dump end\n", __func__, __LINE__);
cdev = kzalloc(sizeof *cdev, GFP_KERNEL);
……
}

比如说我在composite_bind()中调用dump_stack(),代码如上。得到的函数调用栈如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
[   35.571746] 1111111111111111111
[ 35.574990] [xxx-dump] in composite_bind, line = 1675, dump start
[ 35.581099] CPU: 0 PID: 115 Comm: NHPnpReceiverTD Tainted: P O 3.14.19 #1
[ 35.588779] [<c0015f68>] (unwind_backtrace) from [<c0012288>] (show_stack+0x10/0x14)
[ 35.596540] [<c0012288>] (show_stack) from [<c047d8a8>] (dump_stack+0x80/0x90)
[ 35.603793] [<c047d8a8>] (dump_stack) from [<bf045f60>] (composite_bind+0x28/0x1b0 [libcomposite])
[ 35.612764] [<bf045f60>] (composite_bind [libcomposite]) from [<bf0236a4>] (udc_bind_to_driver+0x50/0x110 [udc_core])
[ 35.623365] [<bf0236a4>] (udc_bind_to_driver [udc_core]) from [<bf023f24>] (usb_gadget_probe_driver+0x70/0xcc [udc_core])
[ 35.634309] [<bf023f24>] (usb_gadget_probe_driver [udc_core]) from [<c0008908>] (do_one_initcall+0xd4/0x17c)
[ 35.644133] [<c0008908>] (do_one_initcall) from [<c00765ec>] (load_module+0x1bec/0x2140)
[ 35.652225] [<c00765ec>] (load_module) from [<c0076be8>] (SyS_init_module+0xa8/0x110)
[ 35.660062] [<c0076be8>] (SyS_init_module) from [<c000eca0>] (ret_fast_syscall+0x0/0x30)
[ 35.668148] [xxx-dump] in composite_bind, line = 1677, dump end

很明显,我们可以通过这个调用栈的信息知道composite_bind()的调用关系(从下往上)如下:

  • ret_fast_syscall -> SyS_init_module -> load_module -> do_one_initcall ->
  • usb_gadget_probe_driver -> udc_bind_to_driver -> composite_bind ->
  • dump_stack -> show_stack -> unwind_backtrace

第1行是module_init()相关的调用,也就是说调用了module_init()加载某个驱动。更一般的,我们知道是注册一个USB Composite Driver的过程;
第2行可以直观的看到调用composite_bind()是哪个函数;
第3行是dump_stack()的调用关系;

这里只分析如下四条打印语句,从下往上逐条分析。

1
2
3
4
[   35.596540] [<c0012288>] (show_stack) from [<c047d8a8>] (dump_stack+0x80/0x90)
[ 35.603793] [<c047d8a8>] (dump_stack) from [<bf045f60>] (composite_bind+0x28/0x1b0 [libcomposite])
[ 35.612764] [<bf045f60>] (composite_bind [libcomposite]) from [<bf0236a4>] (udc_bind_to_driver+0x50/0x110 [udc_core])
[ 35.623365] [<bf0236a4>] (udc_bind_to_driver [udc_core]) from [<bf023f24>] (usb_gadget_probe_driver+0x70/0xcc [udc_core])


反汇编文件

在函数之后的[libcomposite][udc_core],标记这是ko文件libcomposite.koudc_core.ko。如果没有标记,说明这是build-in的,只需反汇编vmlinux即可。
接下来我们就将这两个ko文件和vmlinux文件objdump出来。在kernel-xxx/目录下执行以下语句进行反汇编。

1
2
3
../prebuilts/gcc/linux-x86/arm/arm-linux-gnueabihf/bin/arm-linux-gnueabihf-objdump -d -l -f -g -S drivers/usb/gadget/libcomposite.ko > composite.log
../prebuilts/gcc/linux-x86/arm/arm-linux-gnueabihf/bin/arm-linux-gnueabihf-objdump -d -l -f -g -S drivers/usb/gadget/udc-core.ko > udc-core.log
../prebuilts/gcc/linux-x86/arm/arm-linux-gnueabihf/bin/arm-linux-gnueabihf-objdump -d -l -f -g -S vmlinux > vmlinux.log


dump_stack()格式分析

(1)、

1
[<bf0236a4>] (udc_bind_to_driver [udc_core]) from [<bf023f24>] (usb_gadget_probe_driver+0x70/0xcc [udc_core])

从上面的信息,我们至少可以获得以下信息:

  • 这个是编译进udc-core的ko文件的,因此我们要查看udc-core.log文件;
  • 0xbf023f24的地址(usb_gadget_probe_driver()函数的地址偏移0x70)会调用udc_bind_to_driver()函数。因此我们可以得出usb_gadget_probe_driver()函数的入口地址为0xbf023f24-0x70=0xbf023eb4
  • usb_gadget_probe_driver()函数总的偏移量为0xcc,即范围为:0xbf023eb4~0xbf023f80

查看udc-core.log文件,搜索”usb_gadget_probe_driver”的关键字,我们可以得到usb_gadget_probe_driver()函数的位置:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
00000eb4 <usb_gadget_probe_driver>:
usb_gadget_probe_driver():
/home/victor/work/xxx_project/kernel-xxx/drivers/usb/gadget/udc-core.c:625
return ret;
}
EXPORT_SYMBOL_GPL(udc_attach_driver);

int usb_gadget_probe_driver(struct usb_gadget_driver *driver)
{
eb4: e92d4038 push {r3, r4, r5, lr}
/home/victor/work/xxx_project/kernel-xxx/drivers/usb/gadget/udc-core.c:629
struct usb_udc *udc = NULL;
int ret;

if (!driver || !driver->bind || !driver->setup)
eb8: e2505000 subs r5, r0, #0
ebc: 0a000028 beq f64 <usb_gadget_probe_driver+0xb0>
/home/victor/work/xxx_project/kernel-xxx/drivers/usb/gadget/udc-core.c:629 (discriminator 1)
ec0: e5953008 ldr r3, [r5, #8]
ec4: e3530000 cmp r3, #0
ec8: 0a000025 beq f64 <usb_gadget_probe_driver+0xb0>
ecc: e5953010 ldr r3, [r5, #16]
ed0: e3530000 cmp r3, #0
ed4: 0a000022 beq f64 <usb_gadget_probe_driver+0xb0>
/home/victor/work/xxx_project/kernel-xxx/drivers/usb/gadget/udc-core.c:632
return -EINVAL;

mutex_lock(&udc_lock);
ed8: e3004000 movw r4, #0
edc: e3404000 movt r4, #0
ee0: e1a00004 mov r0, r4
ee4: ebfffffe bl 0 <mutex_lock>
/home/victor/work/xxx_project/kernel-xxx/drivers/usb/gadget/udc-core.c:633
list_for_each_entry(udc, &udc_list, list) {
ee8: e1a02004 mov r2, r4
eec: e5b23018 ldr r3, [r2, #24]!
ef0: e1530002 cmp r3, r2
ef4: e24300f8 sub r0, r3, #248 ; 0xf8
ef8: 1a000004 bne f10 <usb_gadget_probe_driver+0x5c>
efc: ea00000e b f3c <usb_gadget_probe_driver+0x88>
f00: e59030f8 ldr r3, [r0, #248] ; 0xf8
f04: e1530002 cmp r3, r2
f08: e24300f8 sub r0, r3, #248 ; 0xf8
f0c: 0a00000a beq f3c <usb_gadget_probe_driver+0x88>
/home/victor/work/xxx_project/kernel-xxx/drivers/usb/gadget/udc-core.c:635
/* For now we take the first one */
if (!udc->driver)
f10: e51330f8 ldr r3, [r3, #-248] ; 0xf8
f14: e3530000 cmp r3, #0
f18: 1afffff8 bne f00 <usb_gadget_probe_driver+0x4c>
/home/victor/work/xxx_project/kernel-xxx/drivers/usb/gadget/udc-core.c:643

pr_debug("couldn't find an available UDC\n");
mutex_unlock(&udc_lock);
return -ENODEV;
found:
ret = udc_bind_to_driver(udc, driver);
f1c: e1a01005 mov r1, r5
f20: ebfffdcb bl 654 <udc_bind_to_driver>
f24: e1a04000 mov r4, r0
/home/victor/work/xxx_project/kernel-xxx/drivers/usb/gadget/udc-core.c:644
mutex_unlock(&udc_lock);
f28: e3000000 movw r0, #0
f2c: e3400000 movt r0, #0
f30: ebfffffe bl 0 <mutex_unlock>
/home/victor/work/xxx_project/kernel-xxx/drivers/usb/gadget/udc-core.c:645
return ret;

对于这样格式的内容,表示看不懂。但从”00000eb4 <usb_gadget_probe_driver>:”这句话可以推测usb_gadget_probe_driver()函数在udc-core.ko的入口地址为:0x00000eb4
0x00000eb4与前面推测的0xbf023eb4总是偏移0xbf023000。我猜想,0xbf023000的偏移量正是udc-core.ko相对整个kernel的偏移量。
所以要找到调用udc_bind_to_driver()的地方,那么其偏移量相对于udc-core.ko应为0xf24
为了找到调用的该函数的所在行,我们使用addr2line工具将地址转换为行号:

1
2
3
4
5
#../prebuilts/gcc/linux-x86/arm/arm-linux-gnueabihf/bin/arm-linux-gnueabihf-addr2line -e drivers/usb/gadget/udc-core.ko 0xf24
/home/victor/work/xxx_project/kernel-xxx/drivers/usb/gadget/udc-core.c:643

#../prebuilts/gcc/linux-x86/arm/arm-linux-gnueabihf/bin/arm-linux-gnueabihf-addr2line -e drivers/usb/gadget/udc-core.ko 0xeb4
/home/victor/work/xxx_project/kernel-xxx/drivers/usb/gadget/udc-core.c:625

就可以知道在usb_gadget_probe_driver()函数的入口在udc-core.c第625行,调用udc_bind_to_driver()udc-core.c第643行。查看代码跟解析出来的一致:
代码

(2)、
按照同样的方法再来解析下面的log:

1
[<bf045f60>] (composite_bind [libcomposite]) from [<bf0236a4>] (udc_bind_to_driver+0x50/0x110 [udc_core])

  • 这个是编译进udc-core的ko文件的,因此我们要查看udc-core.log文件;
  • 0xbf0236a4(在udc-core.ko的地址为0x000006a4)的地址(偏移0x50)会调用composite_bind()函数。因此我们可以得出udc_bind_to_driver()函数的地址为0xbf0236a4-0x50=0xbf023654(在udc-core.ko的地址为0x00000654)
  • udc_bind_to_driver()函数总的偏移量为0x110,即范围为:0xbf023654~0xbf023764
    1
    2
    3
    4
    5
    #../prebuilts/gcc/linux-x86/arm/arm-linux-gnueabihf/bin/arm-linux-gnueabihf-addr2line -e drivers/usb/gadget/udc-core.ko 0x6a4
    /home/victor/work/xxx_project/kernel-xxx/drivers/usb/gadget/udc-core.c:577

    #../prebuilts/gcc/linux-x86/arm/arm-linux-gnueabihf/bin/arm-linux-gnueabihf-addr2line -e drivers/usb/gadget/udc-core.ko 0x654
    /home/victor/work/xxx_project/kernel-xxx/drivers/usb/gadget/udc-core.c:566

就可以知道udc_bind_to_driver()函数入口在udc-core.c第566行,调用composite_bind()函数在udc-core.c第577行。
代码调用

(3)、

1
[   35.603793] [<c047d8a8>] (dump_stack) from [<bf045f60>] (composite_bind+0x28/0x1b0 [libcomposite])

  • 这个是编译进libcomposite的ko文件的,因此我们要查看libcomposite.log文件;
  • 查看composite.log文件并搜索”composite_bind”得到其地址为0x00002f38,因此我们可以知道libcomposite.ko相对整个kernel偏移0xbf045f38-0x00002f38=0xbf043000。

    1
    2
    3
    4
    5
    00002f38 <composite_bind>:
    composite_bind():
    /home/victor/work/xxx_project/kernel-xxx/drivers/usb/gadget/composite.c:1671
    device_remove_file(&cdev->gadget->dev, &dev_attr_suspended);
    }
  • libcomposite.ko的地址为基准,在composite_bind()函数起始地址0x00002f38中偏移0x28,即0x00002f60会去调用dump_stack()函数。

    1
    2
    3
    4
    5
    #../prebuilts/gcc/linux-x86/arm/arm-linux-gnueabihf/bin/arm-linux-gnueabihf-addr2line -e drivers/usb/gadget/libcomposite.ko 0x2f60
    /home/victor/work/xxx_project/kernel-xxx/drivers/usb/gadget/composite.c:1677

    #../prebuilts/gcc/linux-x86/arm/arm-linux-gnueabihf/bin/arm-linux-gnueabihf-addr2line -e drivers/usb/gadget/libcomposite.ko 0x2f38
    /home/victor/work/xxx_project/kernel-xxx/drivers/usb/gadget/composite.c:1671

代码调用

很明显,这就是我添加dump_stack()的位置,追本溯源终于找到自己熟悉的地方了。

(4)、

1
[   35.596540] [<c0012288>] (show_stack) from [<c047d8a8>] (dump_stack+0x80/0x90)

这些是build-in的,直接在vmlinux.ko就可以找到他们的地址。

1
2
3
4
5
#../prebuilts/gcc/linux-x86/arm/arm-linux-gnueabihf/bin/arm-linux-gnueabihf-addr2line -e vmlinux 0xc047d8a8
/home/victor/work/xxx_project/kernel-xxx/lib/dump_stack.c:52

#../prebuilts/gcc/linux-x86/arm/arm-linux-gnueabihf/bin/arm-linux-gnueabihf-addr2line -e vmlinux 0xc047d828
/home/victor/work/xxx_project/kernel-xxx/lib/dump_stack.c:27


参考资料

http://einon.net/DocBook/kernel-api/API-vsnprintf.html
http://blog.csdn.net/liyongming1982/article/details/16349769
http://blog.csdn.net/liyongming1982/article/details/16349875
http://blog.csdn.net/jasonchen_gbd/article/details/45585133

Title:dump_stack()用法

Author:Victor Huang

Time:2019-03-17 / 16:03

Link:http://wowothink.com/3c2873c0/

License: Attribution-NonCommercial-NoDerivatives 4.0 International (CC BY-NC-ND 4.0)