Sunday, October 26, 2025

Searching for non-exported symbols

To install hooks, we need to know the address of the required function. If debugging information is available or the function is present in the symbol table, that’s simple — but what if it’s declared as static? If the function we need contains strings, we’ll try to search for those.

Let's assume we have the following function:


static int __attribute__ ((noinline)) check_password(char *pass)
{
        printf("Enter the password:");
        return 0;
}

And after compilation, it looks like this

    1dc6:       f3 0f 1e fa             endbr64
    1dca:       55                      push   %rbp
    1dcb:       48 89 e5                mov    %rsp,%rbp
    1dce:       48 83 ec 10             sub    $0x10,%rsp
    1dd2:       48 89 7d f8             mov    %rdi,-0x8(%rbp)
    1dd6:       48 8d 05 2b 12 00 00    lea    0x122b(%rip),%rax # 3008
    1ddd:       48 89 c7                mov    %rax,%rdi
    1de0:       b8 00 00 00 00          mov    $0x0,%eax
    1de5:       e8 06 f3 ff ff          call   10f0 <printf@plt>
    1dea:       b8 00 00 00 00          mov    $0x0,%eax
    1def:       c9                      leave
    1df0:       c3                      ret

First of all, we need to find the code section (`.text`) and the read-only data section (`.rodata`). There are many possible approaches, but I chose the one where we search for the function within our own process, and the `link_map` structure will help us locate the addresses. We’ll consider the code to be everything located between `init` and `fini`.


        /* get base address from link_map */
        struct link_map *lm = _r_debug.r_map;
        uint8_t *map = NULL;
        for ( ; lm != NULL; lm = lm->l_prev) {
                printf("LM %s %p\n", lm->l_name, lm->l_ld);
                if (*lm->l_name == 0) {
                        map = (void*)lm->l_addr;
                        break;
                }
        }
        printf("base address %p\n", map);

        /* find rodata segment */
        Elf64_Ehdr *ehdr = (Elf64_Ehdr*)map;
        Elf64_Phdr *phdr = (Elf64_Phdr*)(map + ehdr->e_phoff);
        Elf64_Addr data_addr = 0;
        unsigned data_size;
        /* we assume four-segment layout */
        for (int i = 0; i < ehdr->e_phnum; i++)
                if (phdr[i].p_type == PT_LOAD && (phdr[i].p_flags == PF_R)) {
                        data_addr = phdr[i].p_vaddr;
                        data_size = phdr[i].p_filesz;
                }
        assert(data_size != 0);
        printf("rodata @ %lx (%d)\n", data_addr, data_size);

        /* find text */
        Elf64_Dyn *dyn = NULL;
        uintptr_t init = 0, fini = 0;
        for (dyn = (Elf64_Dyn*)_DYNAMIC; dyn->d_tag != DT_NULL; ++dyn) {
                if (dyn->d_tag == DT_INIT)
                        init = dyn->d_un.d_val;
                if (dyn->d_tag == DT_FINI)
                        fini = dyn->d_un.d_val;
        }
        assert(init != 0 && fini != 0);
        printf("init %lx fini %lx\n", init, fini);

We disassemble the code and add each instruction to a linked list. The `fast` array is used to quickly find the address of the structure that describes an instruction by its offset (ZMist uses the same optimization)


        struct code {
                unsigned flags, imm, off, is_func, len, opcode, visited, modrm, disp;
                struct code *link, *next, *func;
        } *code = NULL, *tail = NULL, *c, *fast[fini];
        bzero(fast, sizeof(fast));
        for (Elf64_Addr i = init; i < fini; i += len) {
                hde64s hs;
                len = hde64_disasm(map + i, &hs);
                assert(len > 0);
                c = malloc(sizeof(struct code));
                // ...
                if (code == NULL) {
                        code = c;
                        tail = c;
                } else {
                        tail->next = c;
                        tail = c;
                }
                fast[i] = c;
        }

We link all the CALL/JMP/Jcc instructions, mark the functions (everything that is called via CALL), and add the functions to a separate skip list.


       /* link calls/jmps, mark functions */
        for (c = code; c; c = c->next)
                if (c->flags & F_RELATIVE) {
                        Elf64_Addr jmp;
                        if (c->flags & F_IMM32)
                                jmp = c->off + c->len + (int32_t)c->imm;
                        else
                                jmp = c->off + c->len + (char)c->imm;
                        if (jmp > init && jmp < fini) {
                                /* let it just fail on PLTs */
                                if (fast[jmp] == NULL)
                                        printf("link error @ %x -> %lx\n", c->off, jmp);
                                else
                                        c->link = fast[jmp];
                                if (c->opcode == 0xe8 && c->link)
                                        c->link->is_func = 1;
                        }
                }

        /* make functions list */
        struct code *func = NULL;
        for (c = code; c; c = c->next)
                if (c->is_func) {
                        printf("func @ %d\n", c->off);
                        if (func == NULL) {
                                func = c;
                                tail = c;
                        } else {
                                tail->func = c;
                                tail = c;
                        }
                }

Let’s find the address of the required string in the rodata segment


        /* find string in rodata segment */
        char needle[32];
        strcpy(needle, "enter the password:");
        needle[0] = 'E';
        char *p = memmem(map + data_addr, data_size, needle, strlen(needle));
        assert(p != NULL);
        Elf64_Addr str = p - (char*)map;
        printf("Found needle @ %lx\n", str);

And finally, we’ll traverse the graph of each function to find the `LEA` instruction (in real code, variants with `MOV` or `PUSH` are also possible).


        /* traverse CFG */
        int traverse(struct code *code, int (*cb)(struct code *c, void *arg), void *arg) {
                struct code *c = code;
                int r = 0, s;
                /* until RET */
                while (c && c->opcode != 0xc3) {
                        /* prevent loops */
                        if (c->visited)
                                return r;
                        c->visited = 1;
                        s = cb(c, arg);
                        if (s)
                                r = s;
                        /* don't recurse into functions */
                        if (c->link && c->opcode != 0xe8) {
                                if (c->opcode == 0xe9 || c->opcode == 0xeb) {
                                        c = c->link;
                                        continue;
                                } else {
                                        /* Jcc */
                                        int s = traverse(c->link, cb, arg);
                                        if (s)
                                                r = s;
                                }
                        }
                        c = c->next;
                }
                return r;
        }
        int cb(struct code *c, void *arg) {
                Elf64_Addr sa = *(Elf64_Addr*)arg;

                /* lea rip-relative, in real code one should check for other insn too */
                if (c->opcode == 0x8d && (c->flags & F_MODRM) && (c->modrm & 0xc7) == 5) {
                        Elf64_Addr da = c->off + c->len + (int)c->disp;
                        if (sa == da) {
                                printf("%x LEA\n", c->off);
                                return 1;
                        }
                }
                return 0;
        }

        /* for each function... */
        for (c = func; c; c = c->func) {
                printf("checking %x...\n", c->off);
                if (traverse(c, cb, &str) == 1)
                        break;
        }

And it just works. Although in real code, a more thorough analysis of the file structure would be required.

$ gcc unexport.c -o unexport # full code on github
$ ./unexport
LM  0x55ef516f3d80
base address 0x55ef516ef000
rodata @ 3000 (732)
init 1000 fini 2a00
link error @ 1039 -> 1020
link error @ 1049 -> 1020
.......
func @ 4272
func @ 4288
.......
Found needle @ 3008
checking 10b0...
checking 10c0...
.......
checking 1dc6...
1dd6 LEA
Enter the password:
OK, found it @ 1dc6

No comments:

Post a Comment