From 3b3d64743ba2a874df9d70cd19e242205b0a788c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 11 Jul 2009 02:30:25 +0000 Subject: [PATCH] mips: replace -mlong-calls with -mno-long-calls to make function calls faster in kernel modules to achieve this, try to load kernel modules to KSEG0 and if that doesn't work, use vmalloc and fix up relocations with a jump table based on code from a kernel patch by MikroTik SVN-Revision: 16772 --- .../026-mips_module_reloc.patch | 325 ++++++++++++++++++ .../027-mips_module_reloc.patch | 325 ++++++++++++++++++ 2 files changed, 650 insertions(+) create mode 100644 target/linux/generic-2.6/patches-2.6.28/026-mips_module_reloc.patch create mode 100644 target/linux/generic-2.6/patches-2.6.30/027-mips_module_reloc.patch diff --git a/target/linux/generic-2.6/patches-2.6.28/026-mips_module_reloc.patch b/target/linux/generic-2.6/patches-2.6.28/026-mips_module_reloc.patch new file mode 100644 index 0000000000..49978ee38b --- /dev/null +++ b/target/linux/generic-2.6/patches-2.6.28/026-mips_module_reloc.patch @@ -0,0 +1,325 @@ +--- a/arch/mips/Makefile ++++ b/arch/mips/Makefile +@@ -82,7 +82,7 @@ all-$(CONFIG_BOOT_ELF64) := $(vmlinux-64 + cflags-y += -G 0 -mno-abicalls -fno-pic -pipe + cflags-y += -msoft-float + LDFLAGS_vmlinux += -G 0 -static -n -nostdlib +-MODFLAGS += -mlong-calls ++MODFLAGS += -mno-long-calls + + cflags-y += -ffreestanding + +--- a/arch/mips/include/asm/module.h ++++ b/arch/mips/include/asm/module.h +@@ -9,6 +9,11 @@ struct mod_arch_specific { + struct list_head dbe_list; + const struct exception_table_entry *dbe_start; + const struct exception_table_entry *dbe_end; ++ ++ void *plt_tbl; ++ unsigned int core_plt_offset; ++ unsigned int core_plt_size; ++ unsigned int init_plt_offset; + }; + + typedef uint8_t Elf64_Byte; /* Type for a 8-bit quantity. */ +--- a/arch/mips/kernel/module.c ++++ b/arch/mips/kernel/module.c +@@ -43,6 +43,114 @@ static struct mips_hi16 *mips_hi16_list; + static LIST_HEAD(dbe_list); + static DEFINE_SPINLOCK(dbe_lock); + ++/* ++ * Get the potential max trampolines size required of the init and ++ * non-init sections. Only used if we cannot find enough contiguous ++ * physically mapped memory to put the module into. ++ */ ++static unsigned int ++get_plt_size(const Elf32_Ehdr *hdr, const Elf32_Shdr *sechdrs, ++ const char *secstrings, unsigned int symindex, bool is_init) ++{ ++ unsigned long ret = 0; ++ unsigned int i, j; ++ Elf_Sym *syms; ++ ++ /* Everything marked ALLOC (this includes the exported symbols) */ ++ for (i = 1; i < hdr->e_shnum; ++i) { ++ unsigned int info = sechdrs[i].sh_info; ++ ++ if (sechdrs[i].sh_type != SHT_REL ++ && sechdrs[i].sh_type != SHT_RELA) ++ continue; ++ ++ /* Not a valid relocation section? */ ++ if (info >= hdr->e_shnum) ++ continue; ++ ++ /* Don't bother with non-allocated sections */ ++ if (!(sechdrs[info].sh_flags & SHF_ALLOC)) ++ continue; ++ ++ /* If it's called *.init*, and we're not init, we're ++ not interested */ ++ if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0) ++ != is_init) ++ continue; ++ ++ syms = (Elf_Sym *) sechdrs[symindex].sh_addr; ++ if (sechdrs[i].sh_type == SHT_REL) { ++ Elf_Mips_Rel *rel = (void *) sechdrs[i].sh_addr; ++ unsigned int size = sechdrs[i].sh_size / sizeof(*rel); ++ ++ for (j = 0; j < size; ++j) { ++ Elf_Sym *sym; ++ ++ if (ELF_MIPS_R_TYPE(rel[j]) != R_MIPS_26) ++ continue; ++ ++ sym = syms + ELF_MIPS_R_SYM(rel[j]); ++ if (!is_init && sym->st_shndx != SHN_UNDEF) ++ continue; ++ ++ ret += 4 * sizeof(int); ++ } ++ } else { ++ Elf_Mips_Rela *rela = (void *) sechdrs[i].sh_addr; ++ unsigned int size = sechdrs[i].sh_size / sizeof(*rela); ++ ++ for (j = 0; j < size; ++j) { ++ Elf_Sym *sym; ++ ++ if (ELF_MIPS_R_TYPE(rela[j]) != R_MIPS_26) ++ continue; ++ ++ sym = syms + ELF_MIPS_R_SYM(rela[j]); ++ if (!is_init && sym->st_shndx != SHN_UNDEF) ++ continue; ++ ++ ret += 4 * sizeof(int); ++ } ++ } ++ } ++ ++ return ret; ++} ++ ++static void *alloc_phys(unsigned long size) ++{ ++ unsigned order; ++ struct page *page; ++ struct page *p; ++ ++ size = PAGE_ALIGN(size); ++ order = get_order(size); ++ ++ page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN | ++ __GFP_THISNODE, order); ++ if (!page) ++ return NULL; ++ ++ split_page(page, order); ++ ++ for (p = page + (size >> PAGE_SHIFT); p < page + (1 << order); ++p) ++ __free_page(p); ++ ++ return page_address(page); ++} ++ ++static void free_phys(void *ptr, unsigned long size) ++{ ++ struct page *page; ++ struct page *end; ++ ++ page = virt_to_page(ptr); ++ end = page + (PAGE_ALIGN(size) >> PAGE_SHIFT); ++ ++ for (; page < end; ++page) ++ __free_page(page); ++} ++ + void *module_alloc(unsigned long size) + { + #ifdef MODULE_START +@@ -58,16 +166,41 @@ void *module_alloc(unsigned long size) + + return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL); + #else ++ void *ptr; ++ + if (size == 0) + return NULL; +- return vmalloc(size); ++ ++ ptr = alloc_phys(size); ++ ++ /* If we failed to allocate physically contiguous memory, ++ * fall back to regular vmalloc. The module loader code will ++ * create jump tables to handle long jumps */ ++ if (!ptr) ++ return vmalloc(size); ++ ++ return ptr; + #endif + } + ++static inline bool is_phys_addr(void *ptr) ++{ ++ return (KSEGX(ptr) == KSEG0); ++} ++ + /* Free memory returned from module_alloc */ + void module_free(struct module *mod, void *module_region) + { +- vfree(module_region); ++ if (is_phys_addr(module_region)) { ++ if (mod->module_init == module_region) ++ free_phys(module_region, mod->init_size); ++ else if (mod->module_core == module_region) ++ free_phys(module_region, mod->core_size); ++ else ++ BUG(); ++ } else { ++ vfree(module_region); ++ } + /* FIXME: If module_region == mod->init_region, trim exception + table entries. */ + } +@@ -75,6 +208,24 @@ void module_free(struct module *mod, voi + int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *mod) + { ++ unsigned int symindex = 0; ++ unsigned int core_size, init_size; ++ int i; ++ ++ for (i = 1; i < hdr->e_shnum; i++) ++ if (sechdrs[i].sh_type == SHT_SYMTAB) ++ symindex = i; ++ ++ core_size = get_plt_size(hdr, sechdrs, secstrings, symindex, false); ++ init_size = get_plt_size(hdr, sechdrs, secstrings, symindex, true); ++ ++ mod->arch.core_plt_offset = 0; ++ mod->arch.core_plt_size = core_size; ++ mod->arch.init_plt_offset = core_size; ++ mod->arch.plt_tbl = kmalloc(core_size + init_size, GFP_KERNEL); ++ if (!mod->arch.plt_tbl) ++ return -ENOMEM; ++ + return 0; + } + +@@ -97,45 +248,73 @@ static int apply_r_mips_32_rela(struct m + return 0; + } + +-static int apply_r_mips_26_rel(struct module *me, u32 *location, Elf_Addr v) ++static Elf_Addr add_plt_entry_to(unsigned *plt_offset, ++ void *start, unsigned size, Elf_Addr v) + { +- if (v % 4) { +- printk(KERN_ERR "module %s: dangerous relocation\n", me->name); +- return -ENOEXEC; +- } ++ unsigned *tramp = start + *plt_offset; + +- if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) { +- printk(KERN_ERR +- "module %s: relocation overflow\n", +- me->name); +- return -ENOEXEC; +- } ++ if (*plt_offset == size) ++ return 0; ++ ++ *plt_offset += 4 * sizeof(int); ++ ++ /* adjust carry for addiu */ ++ if (v & 0x00008000) ++ v += 0x10000; + +- *location = (*location & ~0x03ffffff) | +- ((*location + (v >> 2)) & 0x03ffffff); ++ tramp[0] = 0x3c190000 | (v >> 16); /* lui t9, hi16 */ ++ tramp[1] = 0x27390000 | (v & 0xffff); /* addiu t9, t9, lo16 */ ++ tramp[2] = 0x03200008; /* jr t9 */ ++ tramp[3] = 0x00000000; /* nop */ ++ ++ return (Elf_Addr) tramp; ++} ++ ++static Elf_Addr add_plt_entry(struct module *me, void *location, Elf_Addr v) ++{ ++ if (location >= me->module_core && ++ location < me->module_core + me->core_size) ++ return add_plt_entry_to(&me->arch.core_plt_offset, ++ me->module_core, me->core_size, v); ++ ++ if (location >= me->module_init && ++ location < me->module_init + me->init_size) ++ return add_plt_entry_to(&me->arch.init_plt_offset, ++ me->module_init, me->init_size, v); + + return 0; + } + +-static int apply_r_mips_26_rela(struct module *me, u32 *location, Elf_Addr v) ++static int set_r_mips_26(struct module *me, u32 *location, u32 ofs, Elf_Addr v) + { + if (v % 4) { + printk(KERN_ERR "module %s: dangerous relocation\n", me->name); + return -ENOEXEC; + } + +- if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) { ++ if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000) && ++ ((v = add_plt_entry(me, location, v + (ofs << 2))) == 0)) { + printk(KERN_ERR + "module %s: relocation overflow\n", + me->name); + return -ENOEXEC; + } + +- *location = (*location & ~0x03ffffff) | ((v >> 2) & 0x03ffffff); ++ *location = (*location & ~0x03ffffff) | ((ofs + (v >> 2)) & 0x03ffffff); + + return 0; + } + ++static int apply_r_mips_26_rel(struct module *me, u32 *location, Elf_Addr v) ++{ ++ return set_r_mips_26(me, location, *location & 0x03ffffff, v); ++} ++ ++static int apply_r_mips_26_rela(struct module *me, u32 *location, Elf_Addr v) ++{ ++ return set_r_mips_26(me, location, 0, v); ++} ++ + static int apply_r_mips_hi16_rel(struct module *me, u32 *location, Elf_Addr v) + { + struct mips_hi16 *n; +@@ -400,11 +579,23 @@ int module_finalize(const Elf_Ehdr *hdr, + list_add(&me->arch.dbe_list, &dbe_list); + spin_unlock_irq(&dbe_lock); + } ++ ++ /* Get rid of the fixup trampoline if we're running the module ++ * from physically mapped address space */ ++ if (me->arch.core_plt_offset == 0 && ++ me->arch.init_plt_offset == me->arch.core_plt_size && ++ is_phys_addr(me->module_core)) { ++ kfree(me->arch.plt_tbl); ++ me->arch.plt_tbl = NULL; ++ } ++ + return 0; + } + + void module_arch_cleanup(struct module *mod) + { ++ if (mod->arch.plt_tbl) ++ kfree(mod->arch.plt_tbl); + spin_lock_irq(&dbe_lock); + list_del(&mod->arch.dbe_list); + spin_unlock_irq(&dbe_lock); diff --git a/target/linux/generic-2.6/patches-2.6.30/027-mips_module_reloc.patch b/target/linux/generic-2.6/patches-2.6.30/027-mips_module_reloc.patch new file mode 100644 index 0000000000..912857b0ee --- /dev/null +++ b/target/linux/generic-2.6/patches-2.6.30/027-mips_module_reloc.patch @@ -0,0 +1,325 @@ +--- a/arch/mips/Makefile ++++ b/arch/mips/Makefile +@@ -83,7 +83,7 @@ all-$(CONFIG_BOOT_ELF64) := $(vmlinux-64 + cflags-y += -G 0 -mno-abicalls -fno-pic -pipe + cflags-y += -msoft-float + LDFLAGS_vmlinux += -G 0 -static -n -nostdlib +-MODFLAGS += -mlong-calls ++MODFLAGS += -mno-long-calls + + cflags-y += -ffreestanding + +--- a/arch/mips/include/asm/module.h ++++ b/arch/mips/include/asm/module.h +@@ -9,6 +9,11 @@ struct mod_arch_specific { + struct list_head dbe_list; + const struct exception_table_entry *dbe_start; + const struct exception_table_entry *dbe_end; ++ ++ void *plt_tbl; ++ unsigned int core_plt_offset; ++ unsigned int core_plt_size; ++ unsigned int init_plt_offset; + }; + + typedef uint8_t Elf64_Byte; /* Type for a 8-bit quantity. */ +--- a/arch/mips/kernel/module.c ++++ b/arch/mips/kernel/module.c +@@ -43,6 +43,114 @@ static struct mips_hi16 *mips_hi16_list; + static LIST_HEAD(dbe_list); + static DEFINE_SPINLOCK(dbe_lock); + ++/* ++ * Get the potential max trampolines size required of the init and ++ * non-init sections. Only used if we cannot find enough contiguous ++ * physically mapped memory to put the module into. ++ */ ++static unsigned int ++get_plt_size(const Elf32_Ehdr *hdr, const Elf32_Shdr *sechdrs, ++ const char *secstrings, unsigned int symindex, bool is_init) ++{ ++ unsigned long ret = 0; ++ unsigned int i, j; ++ Elf_Sym *syms; ++ ++ /* Everything marked ALLOC (this includes the exported symbols) */ ++ for (i = 1; i < hdr->e_shnum; ++i) { ++ unsigned int info = sechdrs[i].sh_info; ++ ++ if (sechdrs[i].sh_type != SHT_REL ++ && sechdrs[i].sh_type != SHT_RELA) ++ continue; ++ ++ /* Not a valid relocation section? */ ++ if (info >= hdr->e_shnum) ++ continue; ++ ++ /* Don't bother with non-allocated sections */ ++ if (!(sechdrs[info].sh_flags & SHF_ALLOC)) ++ continue; ++ ++ /* If it's called *.init*, and we're not init, we're ++ not interested */ ++ if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0) ++ != is_init) ++ continue; ++ ++ syms = (Elf_Sym *) sechdrs[symindex].sh_addr; ++ if (sechdrs[i].sh_type == SHT_REL) { ++ Elf_Mips_Rel *rel = (void *) sechdrs[i].sh_addr; ++ unsigned int size = sechdrs[i].sh_size / sizeof(*rel); ++ ++ for (j = 0; j < size; ++j) { ++ Elf_Sym *sym; ++ ++ if (ELF_MIPS_R_TYPE(rel[j]) != R_MIPS_26) ++ continue; ++ ++ sym = syms + ELF_MIPS_R_SYM(rel[j]); ++ if (!is_init && sym->st_shndx != SHN_UNDEF) ++ continue; ++ ++ ret += 4 * sizeof(int); ++ } ++ } else { ++ Elf_Mips_Rela *rela = (void *) sechdrs[i].sh_addr; ++ unsigned int size = sechdrs[i].sh_size / sizeof(*rela); ++ ++ for (j = 0; j < size; ++j) { ++ Elf_Sym *sym; ++ ++ if (ELF_MIPS_R_TYPE(rela[j]) != R_MIPS_26) ++ continue; ++ ++ sym = syms + ELF_MIPS_R_SYM(rela[j]); ++ if (!is_init && sym->st_shndx != SHN_UNDEF) ++ continue; ++ ++ ret += 4 * sizeof(int); ++ } ++ } ++ } ++ ++ return ret; ++} ++ ++static void *alloc_phys(unsigned long size) ++{ ++ unsigned order; ++ struct page *page; ++ struct page *p; ++ ++ size = PAGE_ALIGN(size); ++ order = get_order(size); ++ ++ page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN | ++ __GFP_THISNODE, order); ++ if (!page) ++ return NULL; ++ ++ split_page(page, order); ++ ++ for (p = page + (size >> PAGE_SHIFT); p < page + (1 << order); ++p) ++ __free_page(p); ++ ++ return page_address(page); ++} ++ ++static void free_phys(void *ptr, unsigned long size) ++{ ++ struct page *page; ++ struct page *end; ++ ++ page = virt_to_page(ptr); ++ end = page + (PAGE_ALIGN(size) >> PAGE_SHIFT); ++ ++ for (; page < end; ++page) ++ __free_page(page); ++} ++ + void *module_alloc(unsigned long size) + { + #ifdef MODULE_START +@@ -58,16 +166,41 @@ void *module_alloc(unsigned long size) + + return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL); + #else ++ void *ptr; ++ + if (size == 0) + return NULL; +- return vmalloc(size); ++ ++ ptr = alloc_phys(size); ++ ++ /* If we failed to allocate physically contiguous memory, ++ * fall back to regular vmalloc. The module loader code will ++ * create jump tables to handle long jumps */ ++ if (!ptr) ++ return vmalloc(size); ++ ++ return ptr; + #endif + } + ++static inline bool is_phys_addr(void *ptr) ++{ ++ return (KSEGX(ptr) == KSEG0); ++} ++ + /* Free memory returned from module_alloc */ + void module_free(struct module *mod, void *module_region) + { +- vfree(module_region); ++ if (is_phys_addr(module_region)) { ++ if (mod->module_init == module_region) ++ free_phys(module_region, mod->init_size); ++ else if (mod->module_core == module_region) ++ free_phys(module_region, mod->core_size); ++ else ++ BUG(); ++ } else { ++ vfree(module_region); ++ } + /* FIXME: If module_region == mod->init_region, trim exception + table entries. */ + } +@@ -75,6 +208,24 @@ void module_free(struct module *mod, voi + int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *mod) + { ++ unsigned int symindex = 0; ++ unsigned int core_size, init_size; ++ int i; ++ ++ for (i = 1; i < hdr->e_shnum; i++) ++ if (sechdrs[i].sh_type == SHT_SYMTAB) ++ symindex = i; ++ ++ core_size = get_plt_size(hdr, sechdrs, secstrings, symindex, false); ++ init_size = get_plt_size(hdr, sechdrs, secstrings, symindex, true); ++ ++ mod->arch.core_plt_offset = 0; ++ mod->arch.core_plt_size = core_size; ++ mod->arch.init_plt_offset = core_size; ++ mod->arch.plt_tbl = kmalloc(core_size + init_size, GFP_KERNEL); ++ if (!mod->arch.plt_tbl) ++ return -ENOMEM; ++ + return 0; + } + +@@ -97,45 +248,73 @@ static int apply_r_mips_32_rela(struct m + return 0; + } + +-static int apply_r_mips_26_rel(struct module *me, u32 *location, Elf_Addr v) ++static Elf_Addr add_plt_entry_to(unsigned *plt_offset, ++ void *start, unsigned size, Elf_Addr v) + { +- if (v % 4) { +- printk(KERN_ERR "module %s: dangerous relocation\n", me->name); +- return -ENOEXEC; +- } ++ unsigned *tramp = start + *plt_offset; + +- if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) { +- printk(KERN_ERR +- "module %s: relocation overflow\n", +- me->name); +- return -ENOEXEC; +- } ++ if (*plt_offset == size) ++ return 0; ++ ++ *plt_offset += 4 * sizeof(int); ++ ++ /* adjust carry for addiu */ ++ if (v & 0x00008000) ++ v += 0x10000; + +- *location = (*location & ~0x03ffffff) | +- ((*location + (v >> 2)) & 0x03ffffff); ++ tramp[0] = 0x3c190000 | (v >> 16); /* lui t9, hi16 */ ++ tramp[1] = 0x27390000 | (v & 0xffff); /* addiu t9, t9, lo16 */ ++ tramp[2] = 0x03200008; /* jr t9 */ ++ tramp[3] = 0x00000000; /* nop */ ++ ++ return (Elf_Addr) tramp; ++} ++ ++static Elf_Addr add_plt_entry(struct module *me, void *location, Elf_Addr v) ++{ ++ if (location >= me->module_core && ++ location < me->module_core + me->core_size) ++ return add_plt_entry_to(&me->arch.core_plt_offset, ++ me->module_core, me->core_size, v); ++ ++ if (location >= me->module_init && ++ location < me->module_init + me->init_size) ++ return add_plt_entry_to(&me->arch.init_plt_offset, ++ me->module_init, me->init_size, v); + + return 0; + } + +-static int apply_r_mips_26_rela(struct module *me, u32 *location, Elf_Addr v) ++static int set_r_mips_26(struct module *me, u32 *location, u32 ofs, Elf_Addr v) + { + if (v % 4) { + printk(KERN_ERR "module %s: dangerous relocation\n", me->name); + return -ENOEXEC; + } + +- if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) { ++ if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000) && ++ ((v = add_plt_entry(me, location, v + (ofs << 2))) == 0)) { + printk(KERN_ERR + "module %s: relocation overflow\n", + me->name); + return -ENOEXEC; + } + +- *location = (*location & ~0x03ffffff) | ((v >> 2) & 0x03ffffff); ++ *location = (*location & ~0x03ffffff) | ((ofs + (v >> 2)) & 0x03ffffff); + + return 0; + } + ++static int apply_r_mips_26_rel(struct module *me, u32 *location, Elf_Addr v) ++{ ++ return set_r_mips_26(me, location, *location & 0x03ffffff, v); ++} ++ ++static int apply_r_mips_26_rela(struct module *me, u32 *location, Elf_Addr v) ++{ ++ return set_r_mips_26(me, location, 0, v); ++} ++ + static int apply_r_mips_hi16_rel(struct module *me, u32 *location, Elf_Addr v) + { + struct mips_hi16 *n; +@@ -400,11 +579,23 @@ int module_finalize(const Elf_Ehdr *hdr, + list_add(&me->arch.dbe_list, &dbe_list); + spin_unlock_irq(&dbe_lock); + } ++ ++ /* Get rid of the fixup trampoline if we're running the module ++ * from physically mapped address space */ ++ if (me->arch.core_plt_offset == 0 && ++ me->arch.init_plt_offset == me->arch.core_plt_size && ++ is_phys_addr(me->module_core)) { ++ kfree(me->arch.plt_tbl); ++ me->arch.plt_tbl = NULL; ++ } ++ + return 0; + } + + void module_arch_cleanup(struct module *mod) + { ++ if (mod->arch.plt_tbl) ++ kfree(mod->arch.plt_tbl); + spin_lock_irq(&dbe_lock); + list_del(&mod->arch.dbe_list); + spin_unlock_irq(&dbe_lock);