Case Study Library interpositioning
Understanding linking can help you avoid nasty errors and make you a better programmer
main.c
int sum(int *a, int n);
int array[2] = {1, 2};
int main(int argc, char** argv) {
int val = sum(array, 2);
return val;
}
sum.c
int sum(int a*, int n) {
int i, s = 0;
for (i = 0; i < n; i++) {
s += a[i];
}
return s;
}
Programs are translated and linked using a compiler driver:
linux> gcc -Og -o prog main.c sum.c
linux> ./prog
Reason 1: Modularity
Step 1: Symbol resolution
main
sum
array
sum
.o
files to their final absolute memory locations in the executable.o
file)
a.out
file)
.so
file)
.o
)a.out
).so
).text
section
.rodata
section
.data
section
.bss
section
.symtab
section
.rel.text
section
.text
section.rel.data
section
.data
section.debug
section
static
attributeWhich names will be in the symbol table of symbols.c
?
int incr = 1;
static int foo(int a) {
int b = a + incr;
return b;
}
int main(int argc, char* argv[]) {
printf("%d\n", foo(5));
return 0;
}
Can find this with the command: readelf -s symbols.o
.bss
, or .data
Example: each local x
gets a unique name, for example x.1721
static in x = 15;
inf f() {
static int x = 17;
return x++;
}
int g() {
static int x = 19;
return x += 14;
}
int h() {
return x += 27;
}
extern
specifierExamples
int foo = 5; // strong
p1() { // strong
}
int foo; // weak
p2() { // strong
}
gcc -fno-common
Avoid if you can
static
if you canextern
if you reference an external global variable
The linker does not do type checking
main.c
long int x; // weak symbol
int main(int argc, char* argv[]) {
printf("%ld\n", x);
return 0;
}
variable.c
double x = 3.14; // global strong symbol
extern
Examplec1.c
#include "global.h"
int f() { return g+1 };
global.h
extern int g;
int f();
extern
Example (Continued)c2.c
#include <stdio.h>
#include "global.h"
int g;
int main (int argc, char* argv[]) {
int t = f();
printf("Calling f yields %d\n", t);
return 0;
}
main.c
int array[2] = {1, 2};
int main(int argc, char** argv)
{
int val = sum(array, 2);
return val;
}
main.o
0000000000000000 <main>:
0: 48 83 ec 08 sub $0x8,%rsp
4: be 02 00 00 00 mov $0x2,%esi
9: bf 00 00 00 00 mov $0x0,%edi # %edi = &array
a: R_X86_64_32 array # Relocation entry
e: e8 00 00 00 00 callq 13 <main+0x13> # sum()
f: R_X86_64_PC32 sum-0x4 # Relocation entry
13: 48 83 c4 08 add $0x8,%rsp
17: c3 retq
.a
archive files)
.o
in the archivear rs libstuff.a a.o b.o ... x.o
gcc -static main.o -L. -lstuff
libc.a
(the C standard library)
libm.a
(the C math library)
ar
command can be used to view contents:
ar -t /usr/lib/libc.a
.o
files and .a
files in the command line order.o
or .a
file is encountered, try to resolve each unresolved reference in the list against the symbols defined in the .o
or .a
filelinux> gcc -static -o prog -L. -lstuff main.o
main.o: In function `main':
main.c:(.text+0x19): undefined reference to `thing'
collect2: error: ld returned 1 exit status
.so
fileslibc.so
) usually dynamically linkeddlopen
interface.interp
section
ld-linux.so
).dynamic
section
ldd
to find outExample
linux> ldd prog
linux-vdso.so.1 => (0x00007ffcf2998000)
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f99ad927000)
/lib64/ld-linux-x86-64.so.2 (0x00007f99adcef000)
#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h>
int x[2] = {1, 2};
int y[2] = {3, 4};
int z[2];
int main(int argc, char** argv)
{
void *handle;
void (*addvec)(int *, int *, int *, int);
char *error;
/* Dynamically load the shared library that contains addvec() */
handle = dlopen("./libvector.so", RTLD_LAZY);
if (!handle) {
fprintf(stderr, "%s\n", dlerror());
exit(1);
}
...
...
/* Get a pointer to the addvec() function we just loaded */
addvec = dlsym(handle, "addvec");
if ((error = dlerror()) != NULL) {
fprintf(stderr, "%s\n", error);
exit(1);
}
/* Now we can call addvec() just like any other function */
addvec(x, y, z, 2);
printf("z = [%d %d]\n", z[0], z[1]);
/* Unload the shared library */
if (dlclose(handle) < 0) {
fprintf(stderr, "%s\n", dlerror());
exit(1);
}
return 0;
}
Linking is a technique that allows programs to be constructed from multiple object files
Understanding linking can help you avoid nasty errors and make you a better programmer
malloc
tracingmalloc
/free
for careful error checking#include <stdio.h>
#include <malloc.h>
#include <stdlib.h>
int main(int argc, char *argv[]) {
int i;
for (i = 1; i < argc; i++) {
void *p =
malloc(atoi(argv[i]));
free(p);
}
return(0);
}
Goal: trace the addresses and sizes of the allocated and freed blocks without breaking the program and without modifying the source code
Three solutions: interpose on the library malloc
and free
functions at compile time, link time and load/run time
myalloc.c
#ifdef COMPILETIME
#include <stdio.h>
#include <malloc.h>
/* malloc wrapper function */
void *mymalloc(size_t size)
{
void *ptr = malloc(size);
printf("malloc(%d)=%p\n", (int)size, ptr);
return ptr;
}
/* free wrapper function */
void myfree(void *ptr)
{
free(ptr);
printf("free(%p)\n", ptr);
}
#endif
malloc.h
#define malloc(size) mymalloc(size)
#define free(ptr) myfree(ptr)
void *mymalloc(size_t size);
void myfree(void *ptr);
Compiling
linux> gcc -Wall -DCOMPILETIME -c mymalloc.c
linux> gcc -Wall -I. -o intc int.c mymalloc.o
#ifdef LINKTIME
#include <stdio.h>
void *__real_malloc(size_t size);
void __real_free(void *ptr);
/* malloc wrapper function */
void *__wrap_malloc(size_t size)
{
void *ptr = __real_malloc(size); /* Call libc malloc */
printf("malloc(%d) = %p\n", (int)size, ptr);
return ptr;
}
/* free wrapper function */
void __wrap_free(void *ptr)
{
__real_free(ptr); /* Call libc free */
printf("free(%p)\n", ptr);
}
#endif
Compiling
linux> gcc -Wall -DLINKTIME -c mymalloc.c
linux> gcc -Wall -c int.c
linux> gcc -Wall -Wl,--wrap,malloc -Wl,--wrap,free -o intl \
int.o mymalloc.o
-Wl
” flag passes argument to the linker, replacing each comma with a space--wrap,malloc
” arg
instructs the linker to resolve references in a special way:
malloc
should be resolved as __wrap_malloc
__real_malloc
should be resolved as malloc
#ifdef RUNTIME
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <dlfcn.h>
/* malloc wrapper function */
void *malloc(size_t size)
{
void *(*mallocp)(size_t size);
char *error;
mallocp = dlsym(RTLD_NEXT, "malloc"); /* Get addr of libc malloc */
if ((error = dlerror()) != NULL) {
fputs(error, stderr);
exit(1);
}
char *ptr = mallocp(size); /* Call libc malloc */
printf("malloc(%d) = %p\n", (int)size, ptr);
return ptr;
}
/* free wrapper function */
void free(void *ptr)
{
void (*freep)(void *) = NULL;
char *error;
if (!ptr)
return;
freep = dlsym(RTLD_NEXT, "free"); /* Get address of libc free */
if ((error = dlerror()) != NULL) {
fputs(error, stderr);
exit(1);
}
freep(ptr); /* Call libc free */
printf("free(%p)\n", ptr);
}
#endif
Compiling
linux> gcc -Wall -DRUNTIME -shared -fpic -o mymalloc.so mymalloc.c -ldl
linux> gcc -Wall -o intr int.c
LD_PRELOAD
environment variable tells the dynamic linker to resolve unresolved references by looking in myalloc.so
firstType into (some) shells as:
env LD_PRELOAD=./mymalloc.so ./intr 10 100 1000)
malloc
/free
get macro-expanded into calls to myalloc
/`myfreemalloc
/free
that use dynamic linking to load library malloc
/free
under different names.o
, .a
, and .so
files