Iii. analysis of load_elf_binary function

0 18
First, source code version1) Version: V6.3-rc7, x862) Source code of elf file lo...

First, source code version

1) Version: V6.3-rc7, x86
2) Source code of elf file loading: fs/binfmt_elf.c

Second, Linux executable file registration

Linux supports various different formats of executable programs, and the loading methods of these executable programs are defined by the Linux_binfmt structure in the linux_binfmts.h file:

Iii. analysis of load_elf_binary function
struct linux_binfmt {
    struct list_head lh;
    struct module *module;
    int (*load_binary)(struct linux_binprm *);
    int (*load_shlib)(struct file *);
#ifdef CONFIG_COREDUMP
    int (*core_dump)(struct coredump_params *cprm);
    unsigned long min_coredump; /* minimal dump size */
#endif
};  __randomize_layout;

The structure defines three different loading modes for executable programs:

Loading ModeNote
load_binaryRead the content of the executable file and load a new execution environment for the current process
load_shlibDynamically load shared libraries into an existing process
core_dumpStore the current process's execution context into the core file

Each executable file supported by the system corresponds to a Linux_binfmt object, which is registered in a linked list, and the list is edited by the register_binfmt and unregister_binfmt functions. When executing an executable program, the kernel traverses the registered Linux_binfmt objects in the list through list_for_each_enrty and loads it using the correct loading method.
The structure of the Linux_binfmt object for elf files is as follows, this structure defines how elf files are loaded by the load_elf_binary function:

static struct linux_binfmt elf_format = {
    .module     = THIS_MODULE,
    .load_binary    = load_elf_binary,
    .load_shlib = load_elf_library,
#ifdef CONFIG_COREDUMP
    .core_dump  = elf_core_dump,
    .min_coredump   = ELF_EXEC_PAGESIZE,
#endif
};

Iii. analysis of load_elf_binary function

1. File format verification

struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
 
retval = -ENOEXEC;
/* Do some simple consistency check first */
if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) ! = 0)
    goto out;
 
    if (elf_ex->e_type ! = ET_EXEC && elf_ex->e_type ! = ET_DYN)
        goto out;
    if (! elf_check_arch(elf_ex))
        goto out;
    if (elf_check_fdpic(elf_ex))
        goto out;
    if (! bprm->file->f_op->mmap)
        goto out;

The program first reads the magic number in e_ident and checks it. elf_ident is a 16-byte array at the head of the ELF file, which does not distinguish between architecture and system bits. The first four bytes of e_ident are fixed at 0x7felf, and whether it is an ELF file can be determined by checking this bit.
Then identify whether the file is an executable file or a dynamically linked file. At present, ELF files mainly have four formats, namely, relocatable file (ET_REL), executable file (ET_EXEC), shared object file (ET_DYN) and core file (ET_CORE). The load_elf_binary function is only responsible for parsing exec and dyn files.
Finally, the necessary items such as file-dependent system architecture are analyzed.

2. Read the program header

static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
                       struct file *elf_file)
{
    struct elf_phdr *elf_phdata = NULL;
    int retval = -1;
    unsigned int size;
 
    /*
     * If the size of this structure has changed, skip, because
     * we will be doing the wrong thing.
     */
    if (elf_ex->e_phentsize ! = sizeof(struct elf_phdr))
        goto out;
 
    /* Sanity check the number of program headers... */
    /* ...and their total size. */
    size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
    if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
        goto out;
 
    elf_phdata = kmalloc(size, GFP_KERNEL);
    if (! elf_phdata)
        goto out;
 
    /* Read in the program headers */
    retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
 
out:
    if (retval) {
        kfree(elf_phdata);
        elf_phdata = NULL;
    }
    return elf_phdata;
}

The program header describes the target file structure information directly related to program execution, used to locate the images of various sections in the file, and also contains other information necessary for creating the process image of the program.

3. Read the interpreter section

elf_ppnt = elf_phdata;
    for (i = 0;  i < elf_ex->e_phnum; i++, elf_ppnt++) {
        char *elf_interpreter;
 
        if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
            elf_property_phdata = elf_ppnt;
            continue;
        }
 
        if (elf_ppnt->p_type ! = PT_INTERP)
            continue;
 
        /*
         * This is the program interpreter used for shared libraries -
         * For now, assume that this is an a.out format binary.
         */
        retval = -ENOEXEC;
        if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
            goto out_free_ph;
 
        retval = -ENOMEM;
        elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
        if (! elf_interpreter)
            goto out_free_ph;
 
        retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
                  elf_ppnt->p_offset);
        if (retval < 0)
            goto out_free_interp;
        /* make sure path is NULL terminated */
        retval = -ENOEXEC;
        if (elf_interpreter[elf_ppnt->p_filesz - 1] ! = '0')
            goto out_free_interp;
 
        interpreter = open_exec(elf_interpreter);
        kfree(elf_interpreter);
        retval = PTR_ERR(interpreter);
        if (IS_ERR(interpreter))
            goto out_free_ph;
 
        /*
         * If the binary is not readable then enforce mm->dumpable = 0
         * regardless of the interpreter's permissions.
         */
        would_dump(bprm, interpreter);
 
        interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
        if (! interp_elf_ex) {
            retval = -ENOMEM;
            goto out_free_file;
        }
 
        /* Get the exec headers */
        retval = elf_read(interpreter, interp_elf_ex,
                  sizeof(*interp_elf_ex), 0);
        if (retval < 0)
            goto out_free_dentry;
 
        break;
 
out_free_interp:
        kfree(elf_interpreter);
        goto out_free_ph;
    }

If the program requires dynamic linking, it needs to load the interpreter segment (PT_INTERP), traverse all program headers, identify the interpreter segment, and read the content of this segment. The interpreter segment is actually a string indicating the path to the interpreter program file, and the kernel uses the open_exec function to open the interpreter based on the file pointed to by the string.

4. Obtain executable stack attributes and other custom information

elf_ppnt = elf_phdata;
    for (i = 0;  i < elf_ex->e_phnum; i++, elf_ppnt++)
        switch (elf_ppnt->p_type) {
        case PT_GNU_STACK:
            if (elf_ppnt->p_flags & PF_X)
                executable_stack = EXSTACK_ENABLE_X;
            else
                executable_stack = EXSTACK_DISABLE_X;
            break;
 
        case PT_LOPROC ... PT_HIPROC:
            retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
                          bprm->file, false,
                          &arch_state);
            if (retval)
                goto out_free_dentry;
            break;
        }

Similarly, traverse through the for loop, if the stack attribute section (PT_GNU_STACK) is identified, determine the executable attribute of the stack according to the p_flags flag bit in the program header. If a processor-specific semantic section (PT_ LOPROC to PT_HIPROC) is identified, call the arch_elf_pt_proc function to complete the corresponding configuration.

5. Read the interpreter

if (interpreter) {
        retval = -ELIBBAD;
        /* Not an ELF interpreter */
        if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) ! = 0)
            goto out_free_dentry;
        /* Verify the interpreter has a valid arch */
        if (! elf_check_arch(interp_elf_ex) ||
            elf_check_fdpic(interp_elf_ex))
            goto out_free_dentry;
 
        /* Load the interpreter program headers */
        interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
                           interpreter);
        if (! interp_elf_phdata)
            goto out_free_dentry;

The interpreter is also an elf file, here the interpreter is read for subsequent operations

6. Load program segments

for(i = 0, elf_ppnt = elf_phdata;
    i < elf_ex->e_phnum; i++, elf_ppnt++) {
    int elf_prot, elf_flags;
    unsigned long k, vaddr;
    unsigned long total_size = 0;
    unsigned long alignment;
 
    if (elf_ppnt->p_type ! = PT_LOAD)
        continue;

Load all segments of type PT_LOAD, when processing the first PT_LOAD segment, if the file is of dyn type, it also needs to be address randomized. Randomization also needs to distinguish between interpreters or other ordinary so files. For interpreters, to avoid program conflicts, the program offset is calculated from ELF_ET_DYN_BASE to load it.

if (! first_pt_load) {
    elf_flags |= MAP_FIXED;
}
    elf_flags |= MAP_FIXED_NOREPLACE;
}
    if (interpreter) {
        load_bias = ELF_ET_DYN_BASE;
        if (current->flags & PF_RANDOMIZE)
            load_bias += arch_mmap_rnd();
        alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
        if (alignment)
            load_bias &= ~(alignment - 1);
        elf_flags |= MAP_FIXED_NOREPLACE;
    }
        load_bias = 0;
 
    load_bias = ELF_PAGESTART(load_bias - vaddr);
    total_size = total_mapping_size(elf_phdata,
                    elf_ex->e_phnum);
    if (! total_size) {
        retval = -EINVAL;
        goto out_free_dentry;
    }
}

After everything is ready, establish a mapping between the user space virtual address space and the segments in the target image file through the elf_map function

error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
                elf_prot, elf_flags, total_size);

7. Load program entry address

if (interpreter) {
        elf_entry = load_elf_interp(interp_elf_ex,
                        interpreter,
                        load_bias, interp_elf_phdata,
                        &arch_state);
        if (! IS_ERR_VALUE(elf_entry)) {
            /*
             * load_elf_interp() returns relocation
             * adjustment
             */
            interp_load_addr = elf_entry;
            elf_entry += interp_elf_ex->e_entry;
        }
        if (BAD_ADDR(elf_entry)) {
            retval = IS_ERR_VALUE(elf_entry) ?
                    (int)elf_entry : -EINVAL;
            goto out_free_dentry;
        }
        reloc_func_desc = interp_load_addr;
 
        allow_write_access(interpreter);
        fput(interpreter);
 
        kfree(interp_elf_ex);
        kfree(interp_elf_phdata);
    }
        elf_entry = e_entry;
        if (BAD_ADDR(elf_entry)) {
            retval = -EINVAL;
            goto out_free_dentry;
        }
    }

For programs that require an interpreter, the interpreter image must be loaded first through the load_elf_interp function, and the program entry point must be set to the interpreter's entry address. For files that do not require an interpreter, the entry point virtual address in elf_header can be read directly.

8. Add parameter and environment variable configuration information

retval = create_elf_tables(bprm, elf_ex, interp_load_addr,
               e_entry, phdr_addr);
if (retval < 0)
    goto out;
 
mm = current->mm;
mm->end_code = end_code;
mm->start_code = start_code;
mm->start_data = start_data;
你可能想看:
最后修改时间:
admin
上一篇 2025年03月30日 04:29
下一篇 2025年03月30日 04:51

评论已关闭