Let's say that I fork a child process within my program. At some point, I pause the child process with kill(child, SIGSTOP)
and want to inspect the stack's contents. Is there a way to programmatically obtain a stack trace of a child process from its parent?
I know that ptrace
is the standard way of tracing a child process and examining its memory/registers. I also know that backtrace
provides this functionality for the calling thread. Is there a function or library that merges these functionalities? Or would I need to manually walk the stack with ptrace
?
To answer my own question - this is doable. You need libunwind
and ptrace
. libunwind
provides a wrapper for ptrace
that allows you to unwind remote targets. Here's sample code, running an NPB benchmark (cg, Class A):
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
#include <wait.h>
#include <sys/ptrace.h>
#include <libunwind.h>
#include <libunwind-x86_64.h>
#include <libunwind-ptrace.h>
#include <signal.h>
#define panic(X) fprintf(stderr, #X "\n");
static unw_addr_space_t as;
static struct UPT_info *ui;
void do_backtrace(pid_t child) {
ui = _UPT_create(child);
if (!ui) {
panic("_UPT_create failed");
}
ptrace(PTRACE_ATTACH, child, 0, 0);
struct timespec t = { .tv_sec = 0, t.tv_nsec = 1000000 };
nanosleep(&t, NULL);
unw_cursor_t c;
int rc = unw_init_remote(&c, as, ui);
if (rc != 0) {
if (rc == UNW_EINVAL) {
panic("unw_init_remote: UNW_EINVAL");
} else if (rc == UNW_EUNSPEC) {
panic("unw_init_remote: UNW_EUNSPEC");
} else if (rc == UNW_EBADREG) {
panic("unw_init_remote: UNW_EBADREG");
} else {
panic("unw_init_remote: UNKNOWN");
}
}
do {
unw_word_t offset, pc;
char fname[64];
unw_get_reg(&c, UNW_REG_IP, &pc);
fname[0] = '\0';
(void) unw_get_proc_name(&c, fname, sizeof(fname), &offset);
printf("\n%p : (%s+0x%x) [%p]\n", (void *)pc,
fname,
(int) offset,
(void *) pc);
} while (unw_step(&c) > 0);
ptrace(PTRACE_DETACH, child, 0, 0);
_UPT_destroy(ui);
}
int main(int argc __attribute__((unused)), char **argv, char **envp) {
as = unw_create_addr_space(&_UPT_accessors, 0);
if (!as) {
panic("unw_create_addr_space failed");
}
pid_t child;
child = fork();
if (!child) {
execve("/home/#######/#######/my_utilities/child_bt/cg.A.x",
argv, envp);
return 0;
} else {
struct timespec t = { .tv_sec = 1, .tv_nsec = 0 };
nanosleep(&t, NULL);
do_backtrace(child);
int status;
waitpid(child, &status, 0);
}
return 0;
}
And the output:
#######-######-desktop:~/popcorn/my_utilities/child_bt$ ./child_bt
NAS Parallel Benchmarks (NPB3.3-SER) - CG Benchmark
Size: 14000
Iterations: 15
Initialization time = 0.422 seconds
iteration ||r|| zeta
1 0.26065081214763E-12 19.9997581277040
2 0.25753187736717E-14 17.1140495745506
3 0.25934878907518E-14 17.1296668946143
4 0.25626292684826E-14 17.1302113581193
5 0.25110613524700E-14 17.1302338856353
6 0.25581937582088E-14 17.1302349879482
7 0.25456477041068E-14 17.1302350498916
8 0.24494068328538E-14 17.1302350537510
0x400c85 : (conj_grad_+0x135) [0x400c85]
0x401ec8 : (MAIN__+0x739) [0x401ec8]
0x402b39 : (main+0x1d) [0x402b39]
0x7f8ee80c2ec5 : (__libc_start_main+0xf5) [0x7f8ee80c2ec5]
0x400a89 : (_start+0x29) [0x400a89]
9 0.24885235903729E-14 17.1302350540101
10 0.24771507610856E-14 17.1302350540284
11 0.24928441017003E-14 17.1302350540298
12 0.24443706061229E-14 17.1302350540299
13 0.24709361922612E-14 17.1302350540299
14 0.24381630450112E-14 17.1302350540299
15 0.24296673223448E-14 17.1302350540299
Benchmark completed
VERIFICATION SUCCESSFUL
Zeta is 0.1713023505403E+02
Error is 0.5122640033228E-13
CG Benchmark Completed.
Class = A
Size = 14000
Iterations = 15
Time in seconds = 1.01
Mop/s total = 1483.11
Operation type = floating point
Verification = SUCCESSFUL
Version = 3.3.1
Compile date = 16 Jul 2015
Compile options:
F77 = gfortran
FLINK = $(F77)
F_LIB = (none)
F_INC = (none)
FFLAGS = -O
FLINKFLAGS = -O
RAND = randi8
Please send all errors/feedbacks to:
NPB Development Team
npb@nas.nasa.gov
I based the do_backtrace
function on the test-ptrace.c
file within the tests
folder of the libunwind
distribution, as well as code from this blog.