Technical Blog Post
Abstract
== DEBUGGING CORE FILES [07] == STACK TRACE <-> SOURCE CODE [EXAMPLE 2]
Body
== DEBUGGING CORE FILES [07] == STACK TRACE <-> SOURCE CODE [EXAMPLE 2] C program : example2.c [see at the end of entry] Platform : LINUX X86 Compilation: cc -m64 example2.c -o example2 Execution : ./example2 example2.c 12 # example2 example2.c 12 start time: 1519019133sec 111769nsec line = (null) [loop 5246992] Memory fault(coredump) Let's load the core file and print the stack: # gdb example2 mycore ... (no debugging symbols found)...done. Loaded symbols for /lib64/ld-linux-x86-64.so.2 Failed to read a valid object file image from memory. Core was generated by `example2 example2.c 12'. Program terminated with signal 11, Segmentation fault. #0 0x00002aaaaac3bce0 in strlen () from /lib64/libc.so.6 (gdb) where #0 0x00002aaaaac3bce0 in strlen () from /lib64/libc.so.6 #1 0x00002aaaaac0db31 in vfprintf () from /lib64/libc.so.6 #2 0x00002aaaaac1339a in printf () from /lib64/libc.so.6 #3 0x0000000000400891 in main () So here we fail in 'strlen()' and that's in a system library. We don't have the source code for the library but most likely we passed some wrong arguments to 'strlen()'. Since we don't have the source for 'printf()' either what we can do is locate the 'printf()' call in our source for 'example2.c': We use the return address '0x0000000000400891' to locate the portion of assembly that performs the call. In 'gdb' if you just disassemble the address it will by default disassemble the whole function: (gdb) disas 0x0000000000400891 ... 0x000000000040086c <+260>: mov $0x200,%esi 0x0000000000400871 <+265>: callq 0x400640 <fgets@plt> 0x0000000000400876 <+270>: test %rax,%rax 0x0000000000400879 <+273>: je 0x40089d <main+309> 0x000000000040087b <+275>: mov -0x18(%rbp),%rdx 0x000000000040087f <+279>: mov -0x4(%rbp),%esi 0x0000000000400882 <+282>: mov $0x400a61,%edi 0x0000000000400887 <+287>: mov $0x0,%eax 0x000000000040088c <+292>: callq 0x400650 <printf@plt> 0x0000000000400891 <+297>: addl $0x1,-0x4(%rbp) We have multiple 'printf()' calls in our source file but in the assembly above we clearly see the call to 'fgets()' a few lines above. Also if we look at the registers used to pass arguments we see that we are using 3 of them meaning we pass 3 arguments to 'printf()'. As well, the first argument ($rdi) is set to a numeric value '0x400a61'. Since it is the first argument we should find the formate string: (gdb) x/s 0x400a61 0x400a61: "line = %s [loop %d]\n" So indeed this is the following call that causes the SEGV: 46 while (num < max) { 47 48 if (fgets(bf, BUFSIZE, fp) == 0) 49 break; 50 51 printf("line = %s [loop %d]\n", num, bf); The reason is that arguments 'bf' and 'num' have been passed in the wrong order. According to the format it should be 'bf' first followed by 'num'. == Source code for example2.c == #include <stdio.h> #include <stdlib.h> #include <fcntl.h> #include <strings.h> #include <string.h> #include <unistd.h> #include <errno.h> #define BUFSIZE 512 int main(int ac, char **av) { FILE *fp; char *bf; char *fname; int max; int num; struct timeval t; if (ac < 3) { printf("Usage: %s <filename> <maxloops>\n", av[0]); exit(1); } fname = av[1]; max = atoi(av[2]); if ((bf = (char *) malloc(BUFSIZE)) == 0) { printf("[error] malloc(bf), [errno = %d]\n", errno); exit(1); } gettimeofday(&t, 0); printf("start time: %ldsec %ldnsec\n", t.tv_sec, t.tv_usec); if ((fp = fopen(fname, "r")) == 0) { printf("[error] fopen(%s), [errno = %d]\n", fname, errno); exit(1); } num = 0; while (num < max) { if (fgets(bf, BUFSIZE, fp) == 0) break; printf("line = %s [loop %d]\n", num, bf); num++; } fclose(fp); gettimeofday(&t, 0); printf("end time : %ldsec %ldnsec\n", t.tv_sec, t.tv_usec); exit(0); }
[{"Business Unit":{"code":"BU058","label":"IBM Infrastructure w\/TPS"},"Product":{"code":"SSEPGG","label":"Db2 for Linux, UNIX and Windows"},"Component":"","Platform":[{"code":"PF025","label":"Platform Independent"}],"Version":"","Edition":"","Line of Business":{"code":"LOB10","label":"Data and AI"}}]
UID
ibm11140220