First, source code version
1) Version: V6.3-rc7, x86
2) Source code of elf file loading: fs/binfmt_elf.c
Second, Linux executable file registration
Linux supports various different formats of executable programs, and the loading methods of these executable programs are defined by the Linux_binfmt structure in the linux_binfmts.h file:

struct linux_binfmt {
struct list_head lh;
struct module *module;
int (*load_binary)(struct linux_binprm *);
int (*load_shlib)(struct file *);
#ifdef CONFIG_COREDUMP
int (*core_dump)(struct coredump_params *cprm);
unsigned long min_coredump; /* minimal dump size */
#endif
}; __randomize_layout;
The structure defines three different loading modes for executable programs:
Loading Mode | Note |
---|---|
load_binary | Read the content of the executable file and load a new execution environment for the current process |
load_shlib | Dynamically load shared libraries into an existing process |
core_dump | Store the current process's execution context into the core file |
Each executable file supported by the system corresponds to a Linux_binfmt object, which is registered in a linked list, and the list is edited by the register_binfmt and unregister_binfmt functions. When executing an executable program, the kernel traverses the registered Linux_binfmt objects in the list through list_for_each_enrty and loads it using the correct loading method.
The structure of the Linux_binfmt object for elf files is as follows, this structure defines how elf files are loaded by the load_elf_binary function:
static struct linux_binfmt elf_format = {
.module = THIS_MODULE,
.load_binary = load_elf_binary,
.load_shlib = load_elf_library,
#ifdef CONFIG_COREDUMP
.core_dump = elf_core_dump,
.min_coredump = ELF_EXEC_PAGESIZE,
#endif
};
Iii. analysis of load_elf_binary function
1. File format verification
struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
retval = -ENOEXEC;
/* Do some simple consistency check first */
if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) ! = 0)
goto out;
if (elf_ex->e_type ! = ET_EXEC && elf_ex->e_type ! = ET_DYN)
goto out;
if (! elf_check_arch(elf_ex))
goto out;
if (elf_check_fdpic(elf_ex))
goto out;
if (! bprm->file->f_op->mmap)
goto out;
The program first reads the magic number in e_ident and checks it. elf_ident is a 16-byte array at the head of the ELF file, which does not distinguish between architecture and system bits. The first four bytes of e_ident are fixed at 0x7felf, and whether it is an ELF file can be determined by checking this bit.
Then identify whether the file is an executable file or a dynamically linked file. At present, ELF files mainly have four formats, namely, relocatable file (ET_REL), executable file (ET_EXEC), shared object file (ET_DYN) and core file (ET_CORE). The load_elf_binary function is only responsible for parsing exec and dyn files.
Finally, the necessary items such as file-dependent system architecture are analyzed.
2. Read the program header
static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
struct file *elf_file)
{
struct elf_phdr *elf_phdata = NULL;
int retval = -1;
unsigned int size;
/*
* If the size of this structure has changed, skip, because
* we will be doing the wrong thing.
*/
if (elf_ex->e_phentsize ! = sizeof(struct elf_phdr))
goto out;
/* Sanity check the number of program headers... */
/* ...and their total size. */
size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
if (size == 0 || size > 65536 || size > ELF_MIN_ALIGN)
goto out;
elf_phdata = kmalloc(size, GFP_KERNEL);
if (! elf_phdata)
goto out;
/* Read in the program headers */
retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
out:
if (retval) {
kfree(elf_phdata);
elf_phdata = NULL;
}
return elf_phdata;
}
The program header describes the target file structure information directly related to program execution, used to locate the images of various sections in the file, and also contains other information necessary for creating the process image of the program.
3. Read the interpreter section
elf_ppnt = elf_phdata;
for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
char *elf_interpreter;
if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
elf_property_phdata = elf_ppnt;
continue;
}
if (elf_ppnt->p_type ! = PT_INTERP)
continue;
/*
* This is the program interpreter used for shared libraries -
* For now, assume that this is an a.out format binary.
*/
retval = -ENOEXEC;
if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
goto out_free_ph;
retval = -ENOMEM;
elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
if (! elf_interpreter)
goto out_free_ph;
retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
elf_ppnt->p_offset);
if (retval < 0)
goto out_free_interp;
/* make sure path is NULL terminated */
retval = -ENOEXEC;
if (elf_interpreter[elf_ppnt->p_filesz - 1] ! = '0')
goto out_free_interp;
interpreter = open_exec(elf_interpreter);
kfree(elf_interpreter);
retval = PTR_ERR(interpreter);
if (IS_ERR(interpreter))
goto out_free_ph;
/*
* If the binary is not readable then enforce mm->dumpable = 0
* regardless of the interpreter's permissions.
*/
would_dump(bprm, interpreter);
interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
if (! interp_elf_ex) {
retval = -ENOMEM;
goto out_free_file;
}
/* Get the exec headers */
retval = elf_read(interpreter, interp_elf_ex,
sizeof(*interp_elf_ex), 0);
if (retval < 0)
goto out_free_dentry;
break;
out_free_interp:
kfree(elf_interpreter);
goto out_free_ph;
}
If the program requires dynamic linking, it needs to load the interpreter segment (PT_INTERP), traverse all program headers, identify the interpreter segment, and read the content of this segment. The interpreter segment is actually a string indicating the path to the interpreter program file, and the kernel uses the open_exec function to open the interpreter based on the file pointed to by the string.
4. Obtain executable stack attributes and other custom information
elf_ppnt = elf_phdata;
for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
switch (elf_ppnt->p_type) {
case PT_GNU_STACK:
if (elf_ppnt->p_flags & PF_X)
executable_stack = EXSTACK_ENABLE_X;
else
executable_stack = EXSTACK_DISABLE_X;
break;
case PT_LOPROC ... PT_HIPROC:
retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
bprm->file, false,
&arch_state);
if (retval)
goto out_free_dentry;
break;
}
Similarly, traverse through the for loop, if the stack attribute section (PT_GNU_STACK) is identified, determine the executable attribute of the stack according to the p_flags flag bit in the program header. If a processor-specific semantic section (PT_ LOPROC to PT_HIPROC) is identified, call the arch_elf_pt_proc function to complete the corresponding configuration.
5. Read the interpreter
if (interpreter) {
retval = -ELIBBAD;
/* Not an ELF interpreter */
if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) ! = 0)
goto out_free_dentry;
/* Verify the interpreter has a valid arch */
if (! elf_check_arch(interp_elf_ex) ||
elf_check_fdpic(interp_elf_ex))
goto out_free_dentry;
/* Load the interpreter program headers */
interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
interpreter);
if (! interp_elf_phdata)
goto out_free_dentry;
The interpreter is also an elf file, here the interpreter is read for subsequent operations
6. Load program segments
for(i = 0, elf_ppnt = elf_phdata;
i < elf_ex->e_phnum; i++, elf_ppnt++) {
int elf_prot, elf_flags;
unsigned long k, vaddr;
unsigned long total_size = 0;
unsigned long alignment;
if (elf_ppnt->p_type ! = PT_LOAD)
continue;
Load all segments of type PT_LOAD, when processing the first PT_LOAD segment, if the file is of dyn type, it also needs to be address randomized. Randomization also needs to distinguish between interpreters or other ordinary so files. For interpreters, to avoid program conflicts, the program offset is calculated from ELF_ET_DYN_BASE to load it.
if (! first_pt_load) {
elf_flags |= MAP_FIXED;
}
elf_flags |= MAP_FIXED_NOREPLACE;
}
if (interpreter) {
load_bias = ELF_ET_DYN_BASE;
if (current->flags & PF_RANDOMIZE)
load_bias += arch_mmap_rnd();
alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
if (alignment)
load_bias &= ~(alignment - 1);
elf_flags |= MAP_FIXED_NOREPLACE;
}
load_bias = 0;
load_bias = ELF_PAGESTART(load_bias - vaddr);
total_size = total_mapping_size(elf_phdata,
elf_ex->e_phnum);
if (! total_size) {
retval = -EINVAL;
goto out_free_dentry;
}
}
After everything is ready, establish a mapping between the user space virtual address space and the segments in the target image file through the elf_map function
error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
elf_prot, elf_flags, total_size);
7. Load program entry address
if (interpreter) {
elf_entry = load_elf_interp(interp_elf_ex,
interpreter,
load_bias, interp_elf_phdata,
&arch_state);
if (! IS_ERR_VALUE(elf_entry)) {
/*
* load_elf_interp() returns relocation
* adjustment
*/
interp_load_addr = elf_entry;
elf_entry += interp_elf_ex->e_entry;
}
if (BAD_ADDR(elf_entry)) {
retval = IS_ERR_VALUE(elf_entry) ?
(int)elf_entry : -EINVAL;
goto out_free_dentry;
}
reloc_func_desc = interp_load_addr;
allow_write_access(interpreter);
fput(interpreter);
kfree(interp_elf_ex);
kfree(interp_elf_phdata);
}
elf_entry = e_entry;
if (BAD_ADDR(elf_entry)) {
retval = -EINVAL;
goto out_free_dentry;
}
}
For programs that require an interpreter, the interpreter image must be loaded first through the load_elf_interp function, and the program entry point must be set to the interpreter's entry address. For files that do not require an interpreter, the entry point virtual address in elf_header can be read directly.
8. Add parameter and environment variable configuration information
retval = create_elf_tables(bprm, elf_ex, interp_load_addr,
e_entry, phdr_addr);
if (retval < 0)
goto out;
mm = current->mm;
mm->end_code = end_code;
mm->start_code = start_code;
mm->start_data = start_data;

评论已关闭