diff options
author | David Lamparter <equinox@opensourcerouting.org> | 2018-08-09 19:11:53 +0200 |
---|---|---|
committer | David Lamparter <equinox@diac24.net> | 2021-02-23 16:56:58 +0100 |
commit | 5609b3af497c44d980aad2bc7d6332dedd7974e7 (patch) | |
tree | 132a58bfe8ba5f582cddd9e0fe7625b5abb8da01 | |
parent | Merge pull request #7435 from sudhanshukumar22/bgp-peer-group-issue (diff) | |
download | frr-5609b3af497c44d980aad2bc7d6332dedd7974e7.tar.xz frr-5609b3af497c44d980aad2bc7d6332dedd7974e7.zip |
lib/clippy: add libelf wrapper
This adds _clippy.ELFFile, which provides a fast wrapper around libelf.
The API is similar to / a subset of pyelfutils, which unfortunately is
painfully slow (to the tune of minutes instead of seconds.)
The idea is that xrefs can be read out of ELF files by reading out the
"xref_array" section or "FRRouting/XREF" note.
Signed-off-by: David Lamparter <equinox@diac24.net>
-rwxr-xr-x | configure.ac | 14 | ||||
-rw-r--r-- | debian/control | 1 | ||||
-rw-r--r-- | lib/clippy.h | 2 | ||||
-rw-r--r-- | lib/command_py.c | 2 | ||||
-rw-r--r-- | lib/elf_py.c | 1301 | ||||
-rw-r--r-- | lib/subdir.am | 4 |
6 files changed, 1323 insertions, 1 deletions
diff --git a/configure.ac b/configure.ac index f3d1f3898..139fca7c4 100755 --- a/configure.ac +++ b/configure.ac @@ -794,6 +794,20 @@ fi # AS_IF([test "$host" = "$build"], [ + AC_CHECK_HEADER([gelf.h], [], [ + AC_MSG_ERROR([libelf headers are required for building clippy. (Host only when cross-compiling.)]) + ]) + AC_CHECK_LIB([elf], [elf_memory], [], [ + AC_MSG_ERROR([libelf is required for building clippy. (Host only when cross-compiling.)]) + ]) + + AC_CHECK_LIB([elf], [elf_getdata_rawchunk], [ + AC_DEFINE([HAVE_ELF_GETDATA_RAWCHUNK], [1], [Have elf_getdata_rawchunk()]) + ]) + AC_CHECK_LIB([elf], [gelf_getnote], [ + AC_DEFINE([HAVE_GELF_GETNOTE], [1], [Have gelf_getnote()]) + ]) + FRR_PYTHON_DEV ], [ FRR_PYTHON diff --git a/debian/control b/debian/control index b9e96b55d..7a08cbbdb 100644 --- a/debian/control +++ b/debian/control @@ -13,6 +13,7 @@ Build-Depends: bison, install-info, libc-ares-dev, libcap-dev, + libelf-dev, libjson-c-dev | libjson0-dev, libpam0g-dev | libpam-dev, libpcre3-dev, diff --git a/lib/clippy.h b/lib/clippy.h index be4db6e63..95af27410 100644 --- a/lib/clippy.h +++ b/lib/clippy.h @@ -20,6 +20,7 @@ #ifndef _FRR_CLIPPY_H #define _FRR_CLIPPY_H +#include <stdbool.h> #include <Python.h> #ifdef __cplusplus @@ -28,6 +29,7 @@ extern "C" { extern PyObject *clippy_parse(PyObject *self, PyObject *args); extern PyMODINIT_FUNC command_py_init(void); +extern bool elf_py_init(PyObject *pymod); #ifdef __cplusplus } diff --git a/lib/command_py.c b/lib/command_py.c index 4ec116df3..7f19008fb 100644 --- a/lib/command_py.c +++ b/lib/command_py.c @@ -345,5 +345,7 @@ PyMODINIT_FUNC command_py_init(void) PyModule_AddObject(pymod, "GraphNode", (PyObject *)&typeobj_graph_node); Py_INCREF(&typeobj_graph); PyModule_AddObject(pymod, "Graph", (PyObject *)&typeobj_graph); + if (!elf_py_init(pymod)) + initret(NULL); initret(pymod); } diff --git a/lib/elf_py.c b/lib/elf_py.c new file mode 100644 index 000000000..0d8ad76e1 --- /dev/null +++ b/lib/elf_py.c @@ -0,0 +1,1301 @@ +/* + * fast ELF file accessor + * Copyright (C) 2018-2020 David Lamparter for NetDEF, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* Note: this wrapper is intended to be used as build-time helper. While + * it should be generally correct and proper, there may be the occasional + * memory leak or SEGV for things that haven't been well-tested. + * _ + * / \ This code is NOT SUITABLE FOR UNTRUSTED ELF FILES. It's used + * / ! \ in FRR to read files created by its own build. Don't take it out + * /_____\ of FRR and use it to parse random ELF files you found somewhere. + * + * If you're working with this code (or even reading it), you really need to + * read a bunch of the ELF specs. There's no way around it, things in here + * just represent pieces of ELF pretty much 1:1. Also, readelf & objdump are + * your friends. + * + * Required reading: + * https://refspecs.linuxfoundation.org/elf/elf.pdf + * https://refspecs.linuxfoundation.org/elf/x86_64-SysV-psABI.pdf + * Recommended reading: + * https://github.com/ARM-software/abi-aa/releases/download/2020Q4/aaelf64.pdf + * + * The core ELF spec is *not* enough, you should read at least one of the + * processor specific (psABI) docs. They define what & how relocations work. + * Luckily we don't need to care about the processor specifics since this only + * does data relocations, but without looking at the psABI, some things aren't + * quite clear. + */ + +/* the API of this module roughly follows a very small subset of the one + * provided by the python elfutils package, which unfortunately is painfully + * slow. + */ + +#define PY_SSIZE_T_CLEAN + +#include <Python.h> +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "structmember.h" +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> + +#if defined(__sun__) && (__SIZEOF_POINTER__ == 4) +/* Solaris libelf bails otherwise ... */ +#undef _FILE_OFFSET_BITS +#define _FILE_OFFSET_BITS 32 +#endif + +#include <elf.h> +#include <libelf.h> +#include <gelf.h> + +#include "typesafe.h" +#include "jhash.h" +#include "clippy.h" + +static bool debug; + +#define debugf(...) \ + do { \ + if (debug) \ + fprintf(stderr, __VA_ARGS__); \ + } while (0) + +/* Exceptions */ +static PyObject *ELFFormatError; +static PyObject *ELFAccessError; + +/* most objects can only be created as return values from one of the methods */ +static PyObject *refuse_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyErr_SetString(PyExc_ValueError, + "cannot create instances of this type"); + return NULL; +} + +struct elfreloc; +struct elfsect; + +PREDECL_HASH(elfrelocs) + +/* ELFFile and ELFSection intentionally share some behaviour, particularly + * subscript[123:456] access to file data. This is because relocatables + * (.o files) do things section-based, but linked executables/libraries do + * things file-based. Having the two behave similar allows simplifying the + * Python code. + */ + +/* class ELFFile: + * + * overall entry point, instantiated by reading in an ELF file + */ +struct elffile { + PyObject_HEAD + + char *filename; + char *mmap, *mmend; + size_t len; + Elf *elf; + + /* note from here on there are several instances of + * + * GElf_Something *x, _x; + * + * this is a pattern used by libelf's generic ELF routines; the _x + * field is used to create a copy of the ELF structure from the file + * with 32/64bit and endianness adjusted. + */ + + GElf_Ehdr *ehdr, _ehdr; + Elf_Scn *symtab; + size_t nsym, symstridx; + Elf_Data *symdata; + + PyObject **sects; + size_t n_sect; + + struct elfrelocs_head dynrelocs; + + int elfclass; + bool bigendian; + bool has_symbols; +}; + +/* class ELFSection: + * + * note that executables and shared libraries can have their section headers + * removed, though in practice this is only used as an obfuscation technique. + */ +struct elfsect { + PyObject_HEAD + + const char *name; + struct elffile *ef; + + GElf_Shdr _shdr, *shdr; + Elf_Scn *scn; + unsigned long idx, len; + + struct elfrelocs_head relocs; +}; + +/* class ELFReloc: + * + * note: relocations in object files (.o) are section-based while relocations + * in executables and shared libraries are file-based. + * + * Whenever accessing something that is a pointer in the ELF file, the Python + * code needs to check for a relocation; if the pointer is pointing to some + * unresolved symbol the file will generally contain 0 bytes. The relocation + * will tell what the pointer is actually pointing to. + * + * This represents both static (.o file) and dynamic (.so/exec) relocations. + */ +struct elfreloc { + PyObject_HEAD + + struct elfrelocs_item elfrelocs_item; + + struct elfsect *es; + struct elffile *ef; + + /* there's also old-fashioned GElf_Rel; we're converting that to + * GElf_Rela in elfsect_add_relocations() + */ + GElf_Rela _rela, *rela; + GElf_Sym _sym, *sym; + size_t symidx; + const char *symname; + + /* documented below in python docstrings */ + bool symvalid, unresolved, relative; + unsigned long long st_value; +}; + +static int elfreloc_cmp(const struct elfreloc *a, const struct elfreloc *b); +static uint32_t elfreloc_hash(const struct elfreloc *reloc); + +DECLARE_HASH(elfrelocs, struct elfreloc, elfrelocs_item, + elfreloc_cmp, elfreloc_hash) + +static Elf_Scn *elf_find_addr(struct elffile *ef, uint64_t addr, size_t *idx); +static PyObject *elffile_secbyidx(struct elffile *w, Elf_Scn *scn, size_t idx); +static PyObject *elfreloc_getsection(PyObject *self, PyObject *args); +static PyObject *elfreloc_getaddend(PyObject *obj, void *closure); + +/* --- end of declarations -------------------------------------------------- */ + +/* + * class ELFReloc: + */ + +static const char elfreloc_doc[] = + "Represents an ELF relocation record\n" + "\n" + "(struct elfreloc * in elf_py.c)"; + +#define member(name, type, doc) \ + { \ + (char *)#name, type, offsetof(struct elfreloc, name), READONLY,\ + (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \ + } +static PyMemberDef members_elfreloc[] = { + member(symname, T_STRING, + "Name of symbol this relocation refers to.\n" + "\n" + "Will frequently be `None` in executables and shared libraries." + ), + member(symvalid, T_BOOL, + "Target symbol has a valid type, i.e. not STT_NOTYPE"), + member(unresolved, T_BOOL, + "Target symbol refers to an existing section"), + member(relative, T_BOOL, + "Relocation is a REL (not RELA) record and thus relative."), + member(st_value, T_ULONGLONG, + "Target symbol's value, if known\n\n" + "Will be zero for unresolved/external symbols."), + {} +}; +#undef member + +static PyGetSetDef getset_elfreloc[] = { + { .name = (char *)"r_addend", .get = elfreloc_getaddend, .doc = + (char *)"Relocation addend value"}, + {} +}; + +static PyMethodDef methods_elfreloc[] = { + {"getsection", elfreloc_getsection, METH_VARARGS, + "Find relocation target's ELF section\n\n" + "Args: address of relocatee (TODO: fix/remove?)\n" + "Returns: ELFSection or None\n\n" + "Not possible if section headers have been stripped."}, + {} +}; + +static int elfreloc_cmp(const struct elfreloc *a, const struct elfreloc *b) +{ + if (a->rela->r_offset < b->rela->r_offset) + return -1; + if (a->rela->r_offset > b->rela->r_offset) + return 1; + return 0; +} + +static uint32_t elfreloc_hash(const struct elfreloc *reloc) +{ + return jhash(&reloc->rela->r_offset, sizeof(reloc->rela->r_offset), + 0xc9a2b7f4); +} + +static struct elfreloc *elfrelocs_get(struct elfrelocs_head *head, + GElf_Addr offset) +{ + struct elfreloc dummy; + + dummy.rela = &dummy._rela; + dummy.rela->r_offset = offset; + return elfrelocs_find(head, &dummy); +} + +static PyObject *elfreloc_getsection(PyObject *self, PyObject *args) +{ + struct elfreloc *w = (struct elfreloc *)self; + long data; + + if (!PyArg_ParseTuple(args, "k", &data)) + return NULL; + + if (!w->es) + Py_RETURN_NONE; + + if (w->symidx == 0) { + size_t idx = 0; + Elf_Scn *scn; + + data = (w->relative ? data : 0) + w->rela->r_addend; + scn = elf_find_addr(w->es->ef, data, &idx); + if (!scn) + Py_RETURN_NONE; + return elffile_secbyidx(w->es->ef, scn, idx); + } + return elffile_secbyidx(w->es->ef, NULL, w->sym->st_shndx); +} + +static PyObject *elfreloc_getaddend(PyObject *obj, void *closure) +{ + struct elfreloc *w = (struct elfreloc *)obj; + + return Py_BuildValue("K", (unsigned long long)w->rela->r_addend); +} + +static PyObject *elfreloc_repr(PyObject *arg) +{ + struct elfreloc *w = (struct elfreloc *)arg; + + return PyUnicode_FromFormat("<ELFReloc @%lu %s+%lu>", + (unsigned long)w->rela->r_offset, + (w->symname && w->symname[0]) ? w->symname + : "[0]", + (unsigned long)w->rela->r_addend); +} + +static void elfreloc_free(void *arg) +{ + struct elfreloc *w = arg; + + (void)w; +} + +static PyTypeObject typeobj_elfreloc = { + PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFReloc", + .tp_basicsize = sizeof(struct elfreloc), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = elfreloc_doc, + .tp_new = refuse_new, + .tp_free = elfreloc_free, + .tp_repr = elfreloc_repr, + .tp_members = members_elfreloc, + .tp_methods = methods_elfreloc, + .tp_getset = getset_elfreloc, +}; + +/* + * class ELFSection: + */ + +static const char elfsect_doc[] = + "Represents an ELF section\n" + "\n" + "To access section contents, use subscript notation, e.g.\n" + " section[123:456]\n" + "To read null terminated C strings, replace the end with str:\n" + " section[123:str]\n\n" + "(struct elfsect * in elf_py.c)"; + +static PyObject *elfsect_getaddr(PyObject *self, void *closure); + +#define member(name, type, doc) \ + { \ + (char *)#name, type, offsetof(struct elfsect, name), READONLY, \ + (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \ + } +static PyMemberDef members_elfsect[] = { + member(name, T_STRING, + "Section name, e.g. \".text\""), + member(idx, T_ULONG, + "Section index in file"), + member(len, T_ULONG, + "Section length in bytes"), + {}, +}; +#undef member + +static PyGetSetDef getset_elfsect[] = { + { .name = (char *)"sh_addr", .get = elfsect_getaddr, .doc = + (char *)"Section virtual address (mapped program view)"}, + {} +}; + +static PyObject *elfsect_getaddr(PyObject *self, void *closure) +{ + struct elfsect *w = (struct elfsect *)self; + + return Py_BuildValue("K", (unsigned long long)w->shdr->sh_addr); +} + + +static PyObject *elfsect_getreloc(PyObject *self, PyObject *args) +{ + struct elfsect *w = (struct elfsect *)self; + struct elfreloc *relw; + unsigned long offs; + PyObject *ret; + + if (!PyArg_ParseTuple(args, "k", &offs)) + return NULL; + + relw = elfrelocs_get(&w->relocs, offs + w->shdr->sh_addr); + if (!relw) + Py_RETURN_NONE; + + ret = (PyObject *)relw; + Py_INCREF(ret); + return ret; +} + +static PyMethodDef methods_elfsect[] = { + {"getreloc", elfsect_getreloc, METH_VARARGS, + "Check for / get relocation at offset into section\n\n" + "Args: byte offset into section to check\n" + "Returns: ELFReloc or None"}, + {} +}; + +static PyObject *elfsect_subscript(PyObject *self, PyObject *key) +{ + Py_ssize_t start, stop, step, sllen; + struct elfsect *w = (struct elfsect *)self; + PySliceObject *slice; + unsigned long offs, len = ~0UL; + + if (!PySlice_Check(key)) { + PyErr_SetString(PyExc_IndexError, + "ELFSection subscript must be slice"); + return NULL; + } + slice = (PySliceObject *)key; + if (PyLong_Check(slice->stop)) { + if (PySlice_GetIndicesEx(key, w->shdr->sh_size, + &start, &stop, &step, &sllen)) + return NULL; + + if (step != 1) { + PyErr_SetString(PyExc_IndexError, + "ELFSection subscript slice step must be 1"); + return NULL; + } + if ((GElf_Xword)stop > w->shdr->sh_size) { + PyErr_Format(ELFAccessError, + "access (%lu) beyond end of section %lu/%s (%lu)", + stop, w->idx, w->name, w->shdr->sh_size); + return NULL; + } + + offs = start; + len = sllen; + } else { + if (slice->stop != (void *)&PyUnicode_Type + || !PyLong_Check(slice->start)) { + PyErr_SetString(PyExc_IndexError, "invalid slice"); + return NULL; + } + + offs = PyLong_AsUnsignedLongLong(slice->start); + len = ~0UL; + } + + offs += w->shdr->sh_offset; + if (offs > w->ef->len) { + PyErr_Format(ELFAccessError, + "access (%lu) beyond end of file (%lu)", + offs, w->ef->len); + return NULL; + } + if (len == ~0UL) + len = strnlen(w->ef->mmap + offs, w->ef->len - offs); + + Py_ssize_t pylen = len; + +#if PY_MAJOR_VERSION >= 3 + return Py_BuildValue("y#", w->ef->mmap + offs, pylen); +#else + return Py_BuildValue("s#", w->ef->mmap + offs, pylen); +#endif +} + +static PyMappingMethods mp_elfsect = { + .mp_subscript = elfsect_subscript, +}; + +static void elfsect_free(void *arg) +{ + struct elfsect *w = arg; + + (void)w; +} + +static PyObject *elfsect_repr(PyObject *arg) +{ + struct elfsect *w = (struct elfsect *)arg; + + return PyUnicode_FromFormat("<ELFSection %s>", w->name); +} + +static PyTypeObject typeobj_elfsect = { + PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFSection", + .tp_basicsize = sizeof(struct elfsect), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = elfsect_doc, + .tp_new = refuse_new, + .tp_free = elfsect_free, + .tp_repr = elfsect_repr, + .tp_as_mapping = &mp_elfsect, + .tp_members = members_elfsect, + .tp_methods = methods_elfsect, + .tp_getset = getset_elfsect, +}; + +static void elfsect_add_relocations(struct elfsect *w, Elf_Scn *rel, + GElf_Shdr *relhdr) +{ + size_t i, entries; + Elf_Scn *symtab = elf_getscn(w->ef->elf, relhdr->sh_link); + GElf_Shdr _symhdr, *symhdr = gelf_getshdr(symtab, &_symhdr); + Elf_Data *symdata = elf_getdata(symtab, NULL); + Elf_Data *reldata = elf_getdata(rel, NULL); + + entries = relhdr->sh_size / relhdr->sh_entsize; + for (i = 0; i < entries; i++) { + struct elfreloc *relw; + size_t symidx; + GElf_Rela *rela; + GElf_Sym *sym; + + relw = (struct elfreloc *)typeobj_elfreloc.tp_alloc( + &typeobj_elfreloc, 0); + relw->es = w; + + if (relhdr->sh_type == SHT_REL) { + GElf_Rel _rel, *rel; + + rel = gelf_getrel(reldata, i, &_rel); + relw->rela = &relw->_rela; + relw->rela->r_offset = rel->r_offset; + relw->rela->r_info = rel->r_info; + relw->rela->r_addend = 0; + relw->relative = true; + } else + relw->rela = gelf_getrela(reldata, i, &relw->_rela); + + rela = relw->rela; + if (rela->r_offset < w->shdr->sh_addr + || rela->r_offset >= w->shdr->sh_addr + w->shdr->sh_size) + continue; + + symidx = relw->symidx = GELF_R_SYM(rela->r_info); + sym = relw->sym = gelf_getsym(symdata, symidx, &relw->_sym); + if (sym) { + relw->symname = elf_strptr(w->ef->elf, symhdr->sh_link, + sym->st_name); + relw->symvalid = GELF_ST_TYPE(sym->st_info) + != STT_NOTYPE; + relw->unresolved = sym->st_shndx == SHN_UNDEF; + relw->st_value = sym->st_value; + } else { + relw->symname = NULL; + relw->symvalid = false; + relw->unresolved = false; + relw->st_value = 0; + } + + debugf("reloc @ %016llx sym %5llu %016llx %s\n", + (long long)rela->r_offset, (unsigned long long)symidx, + (long long)rela->r_addend, relw->symname); + + elfrelocs_add(&w->relocs, relw); + } +} + +/* + * bindings & loading code between ELFFile and ELFSection + */ + +static PyObject *elfsect_wrap(struct elffile *ef, Elf_Scn *scn, size_t idx, + const char *name) +{ + struct elfsect *w; + size_t i; + + w = (struct elfsect *)typeobj_elfsect.tp_alloc(&typeobj_elfsect, 0); + if (!w) + return NULL; + + w->name = name; + w->ef = ef; + w->scn = scn; + w->shdr = gelf_getshdr(scn, &w->_shdr); + w->len = w->shdr->sh_size; + w->idx = idx; + elfrelocs_init(&w->relocs); + + for (i = 0; i < ef->ehdr->e_shnum; i++) { + Elf_Scn *scn = elf_getscn(ef->elf, i); + GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + + if (shdr->sh_type != SHT_RELA && shdr->sh_type != SHT_REL) + continue; + if (shdr->sh_info && shdr->sh_info != idx) + continue; + elfsect_add_relocations(w, scn, shdr); + } + + return (PyObject *)w; +} + +static Elf_Scn *elf_find_section(struct elffile *ef, const char *name, + size_t *idx) +{ + size_t i; + const char *secname; + + for (i = 0; i < ef->ehdr->e_shnum; i++) { + Elf_Scn *scn = elf_getscn(ef->elf, i); + GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + + secname = elf_strptr(ef->elf, ef->ehdr->e_shstrndx, + shdr->sh_name); + if (strcmp(secname, name)) + continue; + if (idx) + *idx = i; + return scn; + } + return NULL; +} + +static Elf_Scn *elf_find_addr(struct elffile *ef, uint64_t addr, size_t *idx) +{ + size_t i; + + for (i = 0; i < ef->ehdr->e_shnum; i++) { + Elf_Scn *scn = elf_getscn(ef->elf, i); + GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + + if (addr < shdr->sh_addr || + addr >= shdr->sh_addr + shdr->sh_size) + continue; + + if (idx) + *idx = i; + return scn; + } + return NULL; +} + +/* + * class ELFFile: + */ + +static const char elffile_doc[] = + "Represents an ELF file\n" + "\n" + "Args: filename to load\n" + "\n" + "To access raw file contents, use subscript notation, e.g.\n" + " file[123:456]\n" + "To read null terminated C strings, replace the end with str:\n" + " file[123:str]\n\n" + "(struct elffile * in elf_py.c)"; + + +#define member(name, type, doc) \ + { \ + (char *)#name, type, offsetof(struct elffile, name), READONLY, \ + (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \ + } +static PyMemberDef members_elffile[] = { + member(filename, T_STRING, + "Original file name as given when opening"), + member(elfclass, T_INT, + "ELF class (architecture bit size)\n\n" + "Either 32 or 64, straight integer."), + member(bigendian, T_BOOL, + "ELF file is big-endian\n\n" + "All internal ELF structures are automatically converted."), + member(has_symbols, T_BOOL, + "A symbol section is present\n\n" + "Note: only refers to .symtab/SHT_SYMTAB section, not DT_SYMTAB" + ), + {}, +}; +#undef member + +static PyObject *elffile_secbyidx(struct elffile *w, Elf_Scn *scn, size_t idx) +{ + const char *name; + PyObject *ret; + + if (!scn) + scn = elf_getscn(w->elf, idx); + if (!scn || idx >= w->n_sect) + Py_RETURN_NONE; + + if (!w->sects[idx]) { + GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + + name = elf_strptr(w->elf, w->ehdr->e_shstrndx, shdr->sh_name); + w->sects[idx] = elfsect_wrap(w, scn, idx, name); + } + + ret = w->sects[idx]; + Py_INCREF(ret); + return ret; +} + +static PyObject *elffile_get_section(PyObject *self, PyObject *args) +{ + const char *name; + struct elffile *w = (struct elffile *)self; + Elf_Scn *scn; + size_t idx = 0; + + if (!PyArg_ParseTuple(args, "s", &name)) + return NULL; + + scn = elf_find_section(w, name, &idx); + return elffile_secbyidx(w, scn, idx); +} + +static PyObject *elffile_get_section_addr(PyObject *self, PyObject *args) +{ + unsigned long long addr; + struct elffile *w = (struct elffile *)self; + Elf_Scn *scn; + size_t idx = 0; + + if (!PyArg_ParseTuple(args, "K", &addr)) + return NULL; + + scn = elf_find_addr(w, addr, &idx); + return elffile_secbyidx(w, scn, idx); +} + +static PyObject *elffile_get_section_idx(PyObject *self, PyObject *args) +{ + unsigned long long idx; + struct elffile *w = (struct elffile *)self; + + if (!PyArg_ParseTuple(args, "K", &idx)) + return NULL; + + return elffile_secbyidx(w, NULL, idx); +} + +static PyObject *elffile_get_symbol(PyObject *self, PyObject *args) +{ + const char *name, *symname; + struct elffile *w = (struct elffile *)self; + GElf_Sym _sym, *sym; + size_t i; + + if (!PyArg_ParseTuple(args, "s", &name)) + return NULL; + + for (i = 0; i < w->nsym; i++) { + sym = gelf_getsym(w->symdata, i, &_sym); + if (sym->st_name == 0) + continue; + symname = elf_strptr(w->elf, w->symstridx, sym->st_name); + if (strcmp(symname, name)) + continue; + + PyObject *pysect; + Elf_Scn *scn = elf_getscn(w->elf, sym->st_shndx); + + if (scn) + pysect = elffile_secbyidx(w, scn, sym->st_shndx); + else { + pysect = Py_None; + Py_INCREF(pysect); + } + return Py_BuildValue("sKN", symname, + (unsigned long long)sym->st_value, pysect); + } + Py_RETURN_NONE; +} + +static PyObject *elffile_getreloc(PyObject *self, PyObject *args) +{ + struct elffile *w = (struct elffile *)self; + struct elfreloc *relw; + unsigned long offs; + PyObject *ret; + + if (!PyArg_ParseTuple(args, "k", &offs)) + return NULL; + + relw = elfrelocs_get(&w->dynrelocs, offs); + if (!relw) + Py_RETURN_NONE; + + ret = (PyObject *)relw; + Py_INCREF(ret); + return ret; +} + +static PyObject *elffile_find_note(PyObject *self, PyObject *args) +{ +#if defined(HAVE_GELF_GETNOTE) && defined(HAVE_ELF_GETDATA_RAWCHUNK) + const char *owner; + const uint8_t *ids; + GElf_Word id; + struct elffile *w = (struct elffile *)self; + size_t i; + + if (!PyArg_ParseTuple(args, "ss", &owner, &ids)) + return NULL; + + if (strlen((char *)ids) != 4) { + PyErr_SetString(PyExc_ValueError, + "ELF note ID must be exactly 4-byte string"); + return NULL; + } + if (w->bigendian) + id = (ids[0] << 24) | (ids[1] << 16) | (ids[2] << 8) | ids[3]; + else + id = (ids[3] << 24) | (ids[2] << 16) | (ids[1] << 8) | ids[0]; + + for (i = 0; i < w->ehdr->e_phnum; i++) { + GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr); + Elf_Data *notedata; + size_t offset; + + if (phdr->p_type != PT_NOTE) + continue; + + notedata = elf_getdata_rawchunk(w->elf, phdr->p_offset, + phdr->p_filesz, ELF_T_NHDR); + + GElf_Nhdr nhdr[1]; + size_t nameoffs, dataoffs; + + offset = 0; + while ((offset = gelf_getnote(notedata, offset, nhdr, + &nameoffs, &dataoffs))) { + if (phdr->p_offset + nameoffs >= w->len) + continue; + + const char *name = w->mmap + phdr->p_offset + nameoffs; + + if (strcmp(name, owner)) + continue; + if (id != nhdr->n_type) + continue; + + PyObject *s, *e; + + s = PyLong_FromUnsignedLongLong( + phdr->p_vaddr + dataoffs); + e = PyLong_FromUnsignedLongLong( + phdr->p_vaddr + dataoffs + nhdr->n_descsz); + return PySlice_New(s, e, NULL); + } + } +#endif + Py_RETURN_NONE; +} + +static bool elffile_virt2file(struct elffile *w, GElf_Addr virt, + GElf_Addr *offs) +{ + *offs = 0; + + for (size_t i = 0; i < w->ehdr->e_phnum; i++) { + GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr); + + if (phdr->p_type != PT_LOAD) + continue; + + if (virt < phdr->p_vaddr + || virt >= phdr->p_vaddr + phdr->p_memsz) + continue; + + if (virt >= phdr->p_vaddr + phdr->p_filesz) + return false; + + *offs = virt - phdr->p_vaddr + phdr->p_offset; + return true; + } + + return false; +} + +static PyObject *elffile_subscript(PyObject *self, PyObject *key) +{ + Py_ssize_t start, stop, step; + PySliceObject *slice; + struct elffile *w = (struct elffile *)self; + bool str = false; + + if (!PySlice_Check(key)) { + PyErr_SetString(PyExc_IndexError, + "ELFFile subscript must be slice"); + return NULL; + } + slice = (PySliceObject *)key; + stop = -1; + step = 1; + if (PyLong_Check(slice->stop)) { + start = PyLong_AsSsize_t(slice->start); + if (PyErr_Occurred()) + return NULL; + if (slice->stop != Py_None) { + stop = PyLong_AsSsize_t(slice->stop); + if (PyErr_Occurred()) + return NULL; + } + if (slice->step != Py_None) { + step = PyLong_AsSsize_t(slice->step); + if (PyErr_Occurred()) + return NULL; + } + } else { + if (slice->stop != (void *)&PyUnicode_Type + || !PyLong_Check(slice->start)) { + PyErr_SetString(PyExc_IndexError, "invalid slice"); + return NULL; + } + + str = true; + start = PyLong_AsUnsignedLongLong(slice->start); + } + if (step != 1) { + PyErr_SetString(PyExc_IndexError, + "ELFFile subscript slice step must be 1"); + return NULL; + } + + GElf_Addr xstart = start, xstop = stop; + + for (size_t i = 0; i < w->ehdr->e_phnum; i++) { + GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr); + + if (phdr->p_type != PT_LOAD) + continue; + + if (xstart < phdr->p_vaddr + || xstart >= phdr->p_vaddr + phdr->p_memsz) + continue; + if (!str && (xstop < phdr->p_vaddr + || xstop > phdr->p_vaddr + phdr->p_memsz)) { + PyErr_Format(ELFAccessError, + "access (%llu) beyond end of program header (%llu)", + (long long)xstop, + (long long)(phdr->p_vaddr + + phdr->p_memsz)); + return NULL; + } + + xstart = xstart - phdr->p_vaddr + phdr->p_offset; + + if (str) + xstop = strlen(w->mmap + xstart); + else + xstop = xstop - phdr->p_vaddr + phdr->p_offset; + + Py_ssize_t pylen = xstop - xstart; + +#if PY_MAJOR_VERSION >= 3 + return Py_BuildValue("y#", w->mmap + xstart, pylen); +#else + return Py_BuildValue("s#", w->mmap + xstart, pylen); +#endif + }; + + return PyErr_Format(ELFAccessError, + "virtual address (%llu) not found in program headers", + (long long)start); +} + +static PyMethodDef methods_elffile[] = { + {"find_note", elffile_find_note, METH_VARARGS, + "find specific note entry"}, + {"getreloc", elffile_getreloc, METH_VARARGS, + "find relocation"}, + {"get_symbol", elffile_get_symbol, METH_VARARGS, + "find symbol by name"}, + {"get_section", elffile_get_section, METH_VARARGS, + "find section by name"}, + {"get_section_addr", elffile_get_section_addr, METH_VARARGS, + "find section by address"}, + {"get_section_idx", elffile_get_section_idx, METH_VARARGS, + "find section by index"}, + {} +}; + +static PyObject *elffile_load(PyTypeObject *type, PyObject *args, + PyObject *kwds); + +static void elffile_free(void *arg) +{ + struct elffile *w = arg; + + elf_end(w->elf); + munmap(w->mmap, w->len); + free(w->filename); +} + +static PyMappingMethods mp_elffile = { + .mp_subscript = elffile_subscript, +}; + +static PyTypeObject typeobj_elffile = { + PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFFile", + .tp_basicsize = sizeof(struct elffile), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = elffile_doc, + .tp_new = elffile_load, + .tp_free = elffile_free, + .tp_as_mapping = &mp_elffile, + .tp_members = members_elffile, + .tp_methods = methods_elffile, +}; + +static char *elfdata_strptr(Elf_Data *data, size_t offset) +{ + char *p; + + if (offset >= data->d_size) + return NULL; + + p = (char *)data->d_buf + offset; + if (strnlen(p, data->d_size - offset) >= data->d_size - offset) + return NULL; + + return p; +} + +static void elffile_add_dynreloc(struct elffile *w, Elf_Data *reldata, + size_t entries, Elf_Data *symdata, + Elf_Data *strdata) +{ + size_t i; + + for (i = 0; i < entries; i++) { + struct elfreloc *relw; + size_t symidx; + GElf_Rela *rela; + GElf_Sym *sym; + + relw = (struct elfreloc *)typeobj_elfreloc.tp_alloc( + &typeobj_elfreloc, 0); + relw->ef = w; + + rela = relw->rela = gelf_getrela(reldata, i, &relw->_rela); + symidx = relw->symidx = GELF_R_SYM(rela->r_info); + sym = relw->sym = gelf_getsym(symdata, symidx, &relw->_sym); + if (sym) { + relw->symname = elfdata_strptr(strdata, sym->st_name); + relw->symvalid = GELF_ST_TYPE(sym->st_info) + != STT_NOTYPE; + relw->unresolved = sym->st_shndx == SHN_UNDEF; + relw->st_value = sym->st_value; + } else { + relw->symname = NULL; + relw->symvalid = false; + relw->unresolved = false; + relw->st_value = 0; + } + + debugf("dynreloc @ %016llx sym %5llu %016llx %s\n", + (long long)rela->r_offset, (unsigned long long)symidx, + (long long)rela->r_addend, relw->symname); + + elfrelocs_add(&w->dynrelocs, relw); + } + +} + +/* primary (only, really) entry point to anything in this module */ +static PyObject *elffile_load(PyTypeObject *type, PyObject *args, + PyObject *kwds) +{ + const char *filename; + static const char * const kwnames[] = {"filename", NULL}; + struct elffile *w; + struct stat st; + int fd, err; + + w = (struct elffile *)typeobj_elffile.tp_alloc(&typeobj_elffile, 0); + if (!w) + return NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "s", (char **)kwnames, + &filename)) + return NULL; + + w->filename = strdup(filename); + fd = open(filename, O_RDONLY | O_NOCTTY); + if (fd < 0 || fstat(fd, &st)) { + PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename); + close(fd); + goto out; + } + w->len = st.st_size; + w->mmap = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); + if (!w->mmap) { + PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename); + close(fd); + goto out; + } + close(fd); + w->mmend = w->mmap + st.st_size; + + if (w->len < EI_NIDENT || memcmp(w->mmap, ELFMAG, SELFMAG)) { + PyErr_SetString(ELFFormatError, "invalid ELF signature"); + goto out; + } + + switch (w->mmap[EI_CLASS]) { + case ELFCLASS32: + w->elfclass = 32; + break; + case ELFCLASS64: + w->elfclass = 64; + break; + default: + PyErr_SetString(ELFFormatError, "invalid ELF class"); + goto out; + } + switch (w->mmap[EI_DATA]) { + case ELFDATA2LSB: + w->bigendian = false; + break; + case ELFDATA2MSB: + w->bigendian = true; + break; + default: + PyErr_SetString(ELFFormatError, "invalid ELF byte order"); + goto out; + } + + w->elf = elf_memory(w->mmap, w->len); + if (!w->elf) + goto out_elferr; + w->ehdr = gelf_getehdr(w->elf, &w->_ehdr); + if (!w->ehdr) + goto out_elferr; + + for (size_t i = 0; i < w->ehdr->e_shnum; i++) { + Elf_Scn *scn = elf_getscn(w->elf, i); + GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + + if (shdr->sh_type == SHT_SYMTAB) { + w->symtab = scn; + w->nsym = shdr->sh_size / shdr->sh_entsize; + w->symdata = elf_getdata(scn, NULL); + w->symstridx = shdr->sh_link; + break; + } + } + w->has_symbols = w->symtab && w->symstridx; + elfrelocs_init(&w->dynrelocs); + +#ifdef HAVE_ELF_GETDATA_RAWCHUNK + for (size_t i = 0; i < w->ehdr->e_phnum; i++) { + GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr); + + if (phdr->p_type != PT_DYNAMIC) + continue; + + Elf_Data *dyndata = elf_getdata_rawchunk(w->elf, + phdr->p_offset, phdr->p_filesz, ELF_T_DYN); + + GElf_Addr dynrela = 0, symtab = 0, strtab = 0; + size_t dynrelasz = 0, dynrelaent = 0, strsz = 0; + GElf_Dyn _dyn, *dyn; + + for (size_t j = 0;; j++) { + dyn = gelf_getdyn(dyndata, j, &_dyn); + + if (dyn->d_tag == DT_NULL) + break; + + switch (dyn->d_tag) { + case DT_SYMTAB: + symtab = dyn->d_un.d_ptr; + break; + + case DT_STRTAB: + strtab = dyn->d_un.d_ptr; + break; + case DT_STRSZ: + strsz = dyn->d_un.d_val; + break; + + case DT_RELA: + dynrela = dyn->d_un.d_ptr; + break; + case DT_RELASZ: + dynrelasz = dyn->d_un.d_val; + break; + case DT_RELAENT: + dynrelaent = dyn->d_un.d_val; + break; + + case DT_RELSZ: + if (dyn->d_un.d_val) + fprintf(stderr, + "WARNING: ignoring non-empty DT_REL!\n"); + break; + } + } + + GElf_Addr offset; + Elf_Data *symdata = NULL, *strdata = NULL, *reladata = NULL; + + if (elffile_virt2file(w, symtab, &offset)) + symdata = elf_getdata_rawchunk(w->elf, offset, + w->len - offset, + ELF_T_SYM); + if (elffile_virt2file(w, strtab, &offset)) + strdata = elf_getdata_rawchunk(w->elf, offset, + strsz, ELF_T_BYTE); + + if (!dynrela || !dynrelasz || !dynrelaent) + continue; + + if (!elffile_virt2file(w, dynrela, &offset)) + continue; + + debugf("dynrela @%llx/%llx+%llx\n", (long long)dynrela, + (long long)offset, (long long)dynrelasz); + + reladata = elf_getdata_rawchunk(w->elf, offset, dynrelasz, + ELF_T_RELA); + elffile_add_dynreloc(w, reladata, dynrelasz / dynrelaent, + symdata, strdata); + } +#endif + + w->sects = calloc(sizeof(PyObject *), w->ehdr->e_shnum); + w->n_sect = w->ehdr->e_shnum; + + return (PyObject *)w; + +out_elferr: + err = elf_errno(); + + PyErr_Format(ELFFormatError, "libelf error %d: %s", + err, elf_errmsg(err)); +out: + if (w->elf) + elf_end(w->elf); + free(w->filename); + return NULL; +} + +static PyObject *elfpy_debug(PyObject *self, PyObject *args) +{ + int arg; + + if (!PyArg_ParseTuple(args, "p", &arg)) + return NULL; + + debug = arg; + + Py_RETURN_NONE; +} + +static PyMethodDef methods_elfpy[] = { + {"elfpy_debug", elfpy_debug, METH_VARARGS, "switch debuging on/off"}, + {} +}; + +bool elf_py_init(PyObject *pymod) +{ + if (PyType_Ready(&typeobj_elffile) < 0) + return false; + if (PyType_Ready(&typeobj_elfsect) < 0) + return false; + if (PyType_Ready(&typeobj_elfreloc) < 0) + return false; + if (elf_version(EV_CURRENT) == EV_NONE) + return false; + +#if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 5 + PyModule_AddFunctions(pymod, methods_elfpy); +#else + (void)methods_elfpy; +#endif + + ELFFormatError = PyErr_NewException("_clippy.ELFFormatError", + PyExc_ValueError, NULL); + PyModule_AddObject(pymod, "ELFFormatError", ELFFormatError); + ELFAccessError = PyErr_NewException("_clippy.ELFAccessError", + PyExc_IndexError, NULL); + PyModule_AddObject(pymod, "ELFAccessError", ELFAccessError); + + Py_INCREF(&typeobj_elffile); + PyModule_AddObject(pymod, "ELFFile", (PyObject *)&typeobj_elffile); + Py_INCREF(&typeobj_elfsect); + PyModule_AddObject(pymod, "ELFSection", (PyObject *)&typeobj_elfsect); + Py_INCREF(&typeobj_elfreloc); + PyModule_AddObject(pymod, "ELFReloc", (PyObject *)&typeobj_elfreloc); + return true; +} diff --git a/lib/subdir.am b/lib/subdir.am index d5ffa0854..15c649214 100644 --- a/lib/subdir.am +++ b/lib/subdir.am @@ -410,7 +410,7 @@ lib_grammar_sandbox_LDADD = \ lib_clippy_CPPFLAGS = $(AM_CPPFLAGS) -D_GNU_SOURCE -DBUILDING_CLIPPY lib_clippy_CFLAGS = $(PYTHON_CFLAGS) -lib_clippy_LDADD = $(PYTHON_LIBS) $(UST_LIBS) +lib_clippy_LDADD = $(PYTHON_LIBS) $(UST_LIBS) -lelf lib_clippy_LDFLAGS = -export-dynamic lib_clippy_SOURCES = \ lib/jhash.c \ @@ -420,9 +420,11 @@ lib_clippy_SOURCES = \ lib/command_parse.y \ lib/command_py.c \ lib/defun_lex.l \ + lib/elf_py.c \ lib/graph.c \ lib/libfrr_trace.c \ lib/memory.c \ + lib/typesafe.c \ lib/vector.c \ # end |