You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1543 lines
49 KiB
1543 lines
49 KiB
From ebd731dd71ec9728a5a87ec1cd695be15828c32c Mon Sep 17 00:00:00 2001
|
|
From: popcornmix <popcornmix@gmail.com>
|
|
Date: Mon, 28 Nov 2016 16:50:04 +0000
|
|
Subject: [PATCH] Improve __copy_to_user and __copy_from_user performance
|
|
|
|
Provide a __copy_from_user that uses memcpy. On BCM2708, use
|
|
optimised memcpy/memmove/memcmp/memset implementations.
|
|
|
|
arch/arm: Add mmiocpy/set aliases for memcpy/set
|
|
|
|
See: https://github.com/raspberrypi/linux/issues/1082
|
|
|
|
copy_from_user: CPU_SW_DOMAIN_PAN compatibility
|
|
|
|
The downstream copy_from_user acceleration must also play nice with
|
|
CONFIG_CPU_SW_DOMAIN_PAN.
|
|
|
|
See: https://github.com/raspberrypi/linux/issues/1381
|
|
|
|
Signed-off-by: Phil Elwell <phil@raspberrypi.org>
|
|
---
|
|
arch/arm/include/asm/string.h | 5 +
|
|
arch/arm/include/asm/uaccess.h | 3 +
|
|
arch/arm/lib/Makefile | 15 +-
|
|
arch/arm/lib/arm-mem.h | 159 ++++++++++++
|
|
arch/arm/lib/copy_from_user.S | 4 +-
|
|
arch/arm/lib/exports_rpi.c | 37 +++
|
|
arch/arm/lib/memcmp_rpi.S | 285 +++++++++++++++++++++
|
|
arch/arm/lib/memcpy_rpi.S | 61 +++++
|
|
arch/arm/lib/memcpymove.h | 506 +++++++++++++++++++++++++++++++++++++
|
|
arch/arm/lib/memmove_rpi.S | 61 +++++
|
|
arch/arm/lib/memset_rpi.S | 123 +++++++++
|
|
arch/arm/lib/uaccess_with_memcpy.c | 120 ++++++++-
|
|
arch/arm/mach-bcm/Kconfig | 7 +
|
|
13 files changed, 1380 insertions(+), 6 deletions(-)
|
|
create mode 100644 arch/arm/lib/arm-mem.h
|
|
create mode 100644 arch/arm/lib/exports_rpi.c
|
|
create mode 100644 arch/arm/lib/memcmp_rpi.S
|
|
create mode 100644 arch/arm/lib/memcpy_rpi.S
|
|
create mode 100644 arch/arm/lib/memcpymove.h
|
|
create mode 100644 arch/arm/lib/memmove_rpi.S
|
|
create mode 100644 arch/arm/lib/memset_rpi.S
|
|
|
|
--- a/arch/arm/include/asm/string.h
|
|
+++ b/arch/arm/include/asm/string.h
|
|
@@ -24,6 +24,11 @@ extern void * memchr(const void *, int,
|
|
#define __HAVE_ARCH_MEMSET
|
|
extern void * memset(void *, int, __kernel_size_t);
|
|
|
|
+#ifdef CONFIG_BCM2835_FAST_MEMCPY
|
|
+#define __HAVE_ARCH_MEMCMP
|
|
+extern int memcmp(const void *, const void *, size_t);
|
|
+#endif
|
|
+
|
|
extern void __memzero(void *ptr, __kernel_size_t n);
|
|
|
|
#define memset(p,v,n) \
|
|
--- a/arch/arm/include/asm/uaccess.h
|
|
+++ b/arch/arm/include/asm/uaccess.h
|
|
@@ -477,6 +477,9 @@ do { \
|
|
extern unsigned long __must_check
|
|
arm_copy_from_user(void *to, const void __user *from, unsigned long n);
|
|
|
|
+extern unsigned long __must_check
|
|
+__copy_from_user_std(void *to, const void __user *from, unsigned long n);
|
|
+
|
|
static inline unsigned long __must_check
|
|
__arch_copy_from_user(void *to, const void __user *from, unsigned long n)
|
|
{
|
|
--- a/arch/arm/lib/Makefile
|
|
+++ b/arch/arm/lib/Makefile
|
|
@@ -6,9 +6,8 @@
|
|
|
|
lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
|
|
csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
|
|
- delay.o delay-loop.o findbit.o memchr.o memcpy.o \
|
|
- memmove.o memset.o memzero.o setbit.o \
|
|
- strchr.o strrchr.o \
|
|
+ delay.o delay-loop.o findbit.o memchr.o memzero.o \
|
|
+ setbit.o strchr.o strrchr.o \
|
|
testchangebit.o testclearbit.o testsetbit.o \
|
|
ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
|
|
ucmpdi2.o lib1funcs.o div64.o \
|
|
@@ -18,6 +17,16 @@ lib-y := backtrace.o changebit.o csumip
|
|
mmu-y := clear_user.o copy_page.o getuser.o putuser.o \
|
|
copy_from_user.o copy_to_user.o
|
|
|
|
+# Choose optimised implementations for Raspberry Pi
|
|
+ifeq ($(CONFIG_BCM2835_FAST_MEMCPY),y)
|
|
+ CFLAGS_uaccess_with_memcpy.o += -DCOPY_FROM_USER_THRESHOLD=1600
|
|
+ CFLAGS_uaccess_with_memcpy.o += -DCOPY_TO_USER_THRESHOLD=672
|
|
+ obj-$(CONFIG_MODULES) += exports_rpi.o
|
|
+ lib-y += memcpy_rpi.o memmove_rpi.o memset_rpi.o memcmp_rpi.o
|
|
+else
|
|
+ lib-y += memcpy.o memmove.o memset.o
|
|
+endif
|
|
+
|
|
# using lib_ here won't override already available weak symbols
|
|
obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o
|
|
|
|
--- /dev/null
|
|
+++ b/arch/arm/lib/arm-mem.h
|
|
@@ -0,0 +1,159 @@
|
|
+/*
|
|
+Copyright (c) 2013, Raspberry Pi Foundation
|
|
+Copyright (c) 2013, RISC OS Open Ltd
|
|
+All rights reserved.
|
|
+
|
|
+Redistribution and use in source and binary forms, with or without
|
|
+modification, are permitted provided that the following conditions are met:
|
|
+ * Redistributions of source code must retain the above copyright
|
|
+ notice, this list of conditions and the following disclaimer.
|
|
+ * Redistributions in binary form must reproduce the above copyright
|
|
+ notice, this list of conditions and the following disclaimer in the
|
|
+ documentation and/or other materials provided with the distribution.
|
|
+ * Neither the name of the copyright holder nor the
|
|
+ names of its contributors may be used to endorse or promote products
|
|
+ derived from this software without specific prior written permission.
|
|
+
|
|
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
+*/
|
|
+
|
|
+.macro myfunc fname
|
|
+ .func fname
|
|
+ .global fname
|
|
+fname:
|
|
+.endm
|
|
+
|
|
+.macro preload_leading_step1 backwards, ptr, base
|
|
+/* If the destination is already 16-byte aligned, then we need to preload
|
|
+ * between 0 and prefetch_distance (inclusive) cache lines ahead so there
|
|
+ * are no gaps when the inner loop starts.
|
|
+ */
|
|
+ .if backwards
|
|
+ sub ptr, base, #1
|
|
+ bic ptr, ptr, #31
|
|
+ .else
|
|
+ bic ptr, base, #31
|
|
+ .endif
|
|
+ .set OFFSET, 0
|
|
+ .rept prefetch_distance+1
|
|
+ pld [ptr, #OFFSET]
|
|
+ .if backwards
|
|
+ .set OFFSET, OFFSET-32
|
|
+ .else
|
|
+ .set OFFSET, OFFSET+32
|
|
+ .endif
|
|
+ .endr
|
|
+.endm
|
|
+
|
|
+.macro preload_leading_step2 backwards, ptr, base, leading_bytes, tmp
|
|
+/* However, if the destination is not 16-byte aligned, we may need to
|
|
+ * preload one more cache line than that. The question we need to ask is:
|
|
+ * are the leading bytes more than the amount by which the source
|
|
+ * pointer will be rounded down for preloading, and if so, by how many
|
|
+ * cache lines?
|
|
+ */
|
|
+ .if backwards
|
|
+/* Here we compare against how many bytes we are into the
|
|
+ * cache line, counting down from the highest such address.
|
|
+ * Effectively, we want to calculate
|
|
+ * leading_bytes = dst&15
|
|
+ * cacheline_offset = 31-((src-leading_bytes-1)&31)
|
|
+ * extra_needed = leading_bytes - cacheline_offset
|
|
+ * and test if extra_needed is <= 0, or rearranging:
|
|
+ * leading_bytes + (src-leading_bytes-1)&31 <= 31
|
|
+ */
|
|
+ mov tmp, base, lsl #32-5
|
|
+ sbc tmp, tmp, leading_bytes, lsl #32-5
|
|
+ adds tmp, tmp, leading_bytes, lsl #32-5
|
|
+ bcc 61f
|
|
+ pld [ptr, #-32*(prefetch_distance+1)]
|
|
+ .else
|
|
+/* Effectively, we want to calculate
|
|
+ * leading_bytes = (-dst)&15
|
|
+ * cacheline_offset = (src+leading_bytes)&31
|
|
+ * extra_needed = leading_bytes - cacheline_offset
|
|
+ * and test if extra_needed is <= 0.
|
|
+ */
|
|
+ mov tmp, base, lsl #32-5
|
|
+ add tmp, tmp, leading_bytes, lsl #32-5
|
|
+ rsbs tmp, tmp, leading_bytes, lsl #32-5
|
|
+ bls 61f
|
|
+ pld [ptr, #32*(prefetch_distance+1)]
|
|
+ .endif
|
|
+61:
|
|
+.endm
|
|
+
|
|
+.macro preload_trailing backwards, base, remain, tmp
|
|
+ /* We need either 0, 1 or 2 extra preloads */
|
|
+ .if backwards
|
|
+ rsb tmp, base, #0
|
|
+ mov tmp, tmp, lsl #32-5
|
|
+ .else
|
|
+ mov tmp, base, lsl #32-5
|
|
+ .endif
|
|
+ adds tmp, tmp, remain, lsl #32-5
|
|
+ adceqs tmp, tmp, #0
|
|
+ /* The instruction above has two effects: ensures Z is only
|
|
+ * set if C was clear (so Z indicates that both shifted quantities
|
|
+ * were 0), and clears C if Z was set (so C indicates that the sum
|
|
+ * of the shifted quantities was greater and not equal to 32) */
|
|
+ beq 82f
|
|
+ .if backwards
|
|
+ sub tmp, base, #1
|
|
+ bic tmp, tmp, #31
|
|
+ .else
|
|
+ bic tmp, base, #31
|
|
+ .endif
|
|
+ bcc 81f
|
|
+ .if backwards
|
|
+ pld [tmp, #-32*(prefetch_distance+1)]
|
|
+81:
|
|
+ pld [tmp, #-32*prefetch_distance]
|
|
+ .else
|
|
+ pld [tmp, #32*(prefetch_distance+2)]
|
|
+81:
|
|
+ pld [tmp, #32*(prefetch_distance+1)]
|
|
+ .endif
|
|
+82:
|
|
+.endm
|
|
+
|
|
+.macro preload_all backwards, narrow_case, shift, base, remain, tmp0, tmp1
|
|
+ .if backwards
|
|
+ sub tmp0, base, #1
|
|
+ bic tmp0, tmp0, #31
|
|
+ pld [tmp0]
|
|
+ sub tmp1, base, remain, lsl #shift
|
|
+ .else
|
|
+ bic tmp0, base, #31
|
|
+ pld [tmp0]
|
|
+ add tmp1, base, remain, lsl #shift
|
|
+ sub tmp1, tmp1, #1
|
|
+ .endif
|
|
+ bic tmp1, tmp1, #31
|
|
+ cmp tmp1, tmp0
|
|
+ beq 92f
|
|
+ .if narrow_case
|
|
+ /* In this case, all the data fits in either 1 or 2 cache lines */
|
|
+ pld [tmp1]
|
|
+ .else
|
|
+91:
|
|
+ .if backwards
|
|
+ sub tmp0, tmp0, #32
|
|
+ .else
|
|
+ add tmp0, tmp0, #32
|
|
+ .endif
|
|
+ cmp tmp0, tmp1
|
|
+ pld [tmp0]
|
|
+ bne 91b
|
|
+ .endif
|
|
+92:
|
|
+.endm
|
|
--- a/arch/arm/lib/copy_from_user.S
|
|
+++ b/arch/arm/lib/copy_from_user.S
|
|
@@ -89,11 +89,13 @@
|
|
|
|
.text
|
|
|
|
-ENTRY(arm_copy_from_user)
|
|
+ENTRY(__copy_from_user_std)
|
|
+WEAK(arm_copy_from_user)
|
|
|
|
#include "copy_template.S"
|
|
|
|
ENDPROC(arm_copy_from_user)
|
|
+ENDPROC(__copy_from_user_std)
|
|
|
|
.pushsection .fixup,"ax"
|
|
.align 0
|
|
--- /dev/null
|
|
+++ b/arch/arm/lib/exports_rpi.c
|
|
@@ -0,0 +1,37 @@
|
|
+/**
|
|
+ * Copyright (c) 2014, Raspberry Pi (Trading) Ltd.
|
|
+ *
|
|
+ * Redistribution and use in source and binary forms, with or without
|
|
+ * modification, are permitted provided that the following conditions
|
|
+ * are met:
|
|
+ * 1. Redistributions of source code must retain the above copyright
|
|
+ * notice, this list of conditions, and the following disclaimer,
|
|
+ * without modification.
|
|
+ * 2. Redistributions in binary form must reproduce the above copyright
|
|
+ * notice, this list of conditions and the following disclaimer in the
|
|
+ * documentation and/or other materials provided with the distribution.
|
|
+ * 3. The names of the above-listed copyright holders may not be used
|
|
+ * to endorse or promote products derived from this software without
|
|
+ * specific prior written permission.
|
|
+ *
|
|
+ * ALTERNATIVELY, this software may be distributed under the terms of the
|
|
+ * GNU General Public License ("GPL") version 2, as published by the Free
|
|
+ * Software Foundation.
|
|
+ *
|
|
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
|
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
|
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
+ */
|
|
+
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/module.h>
|
|
+
|
|
+EXPORT_SYMBOL(memcmp);
|
|
--- /dev/null
|
|
+++ b/arch/arm/lib/memcmp_rpi.S
|
|
@@ -0,0 +1,285 @@
|
|
+/*
|
|
+Copyright (c) 2013, Raspberry Pi Foundation
|
|
+Copyright (c) 2013, RISC OS Open Ltd
|
|
+All rights reserved.
|
|
+
|
|
+Redistribution and use in source and binary forms, with or without
|
|
+modification, are permitted provided that the following conditions are met:
|
|
+ * Redistributions of source code must retain the above copyright
|
|
+ notice, this list of conditions and the following disclaimer.
|
|
+ * Redistributions in binary form must reproduce the above copyright
|
|
+ notice, this list of conditions and the following disclaimer in the
|
|
+ documentation and/or other materials provided with the distribution.
|
|
+ * Neither the name of the copyright holder nor the
|
|
+ names of its contributors may be used to endorse or promote products
|
|
+ derived from this software without specific prior written permission.
|
|
+
|
|
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
+*/
|
|
+
|
|
+#include <linux/linkage.h>
|
|
+#include "arm-mem.h"
|
|
+
|
|
+/* Prevent the stack from becoming executable */
|
|
+#if defined(__linux__) && defined(__ELF__)
|
|
+.section .note.GNU-stack,"",%progbits
|
|
+#endif
|
|
+
|
|
+ .text
|
|
+ .arch armv6
|
|
+ .object_arch armv4
|
|
+ .arm
|
|
+ .altmacro
|
|
+ .p2align 2
|
|
+
|
|
+.macro memcmp_process_head unaligned
|
|
+ .if unaligned
|
|
+ ldr DAT0, [S_1], #4
|
|
+ ldr DAT1, [S_1], #4
|
|
+ ldr DAT2, [S_1], #4
|
|
+ ldr DAT3, [S_1], #4
|
|
+ .else
|
|
+ ldmia S_1!, {DAT0, DAT1, DAT2, DAT3}
|
|
+ .endif
|
|
+ ldmia S_2!, {DAT4, DAT5, DAT6, DAT7}
|
|
+.endm
|
|
+
|
|
+.macro memcmp_process_tail
|
|
+ cmp DAT0, DAT4
|
|
+ cmpeq DAT1, DAT5
|
|
+ cmpeq DAT2, DAT6
|
|
+ cmpeq DAT3, DAT7
|
|
+ bne 200f
|
|
+.endm
|
|
+
|
|
+.macro memcmp_leading_31bytes
|
|
+ movs DAT0, OFF, lsl #31
|
|
+ ldrmib DAT0, [S_1], #1
|
|
+ ldrcsh DAT1, [S_1], #2
|
|
+ ldrmib DAT4, [S_2], #1
|
|
+ ldrcsh DAT5, [S_2], #2
|
|
+ movpl DAT0, #0
|
|
+ movcc DAT1, #0
|
|
+ movpl DAT4, #0
|
|
+ movcc DAT5, #0
|
|
+ submi N, N, #1
|
|
+ subcs N, N, #2
|
|
+ cmp DAT0, DAT4
|
|
+ cmpeq DAT1, DAT5
|
|
+ bne 200f
|
|
+ movs DAT0, OFF, lsl #29
|
|
+ ldrmi DAT0, [S_1], #4
|
|
+ ldrcs DAT1, [S_1], #4
|
|
+ ldrcs DAT2, [S_1], #4
|
|
+ ldrmi DAT4, [S_2], #4
|
|
+ ldmcsia S_2!, {DAT5, DAT6}
|
|
+ movpl DAT0, #0
|
|
+ movcc DAT1, #0
|
|
+ movcc DAT2, #0
|
|
+ movpl DAT4, #0
|
|
+ movcc DAT5, #0
|
|
+ movcc DAT6, #0
|
|
+ submi N, N, #4
|
|
+ subcs N, N, #8
|
|
+ cmp DAT0, DAT4
|
|
+ cmpeq DAT1, DAT5
|
|
+ cmpeq DAT2, DAT6
|
|
+ bne 200f
|
|
+ tst OFF, #16
|
|
+ beq 105f
|
|
+ memcmp_process_head 1
|
|
+ sub N, N, #16
|
|
+ memcmp_process_tail
|
|
+105:
|
|
+.endm
|
|
+
|
|
+.macro memcmp_trailing_15bytes unaligned
|
|
+ movs N, N, lsl #29
|
|
+ .if unaligned
|
|
+ ldrcs DAT0, [S_1], #4
|
|
+ ldrcs DAT1, [S_1], #4
|
|
+ .else
|
|
+ ldmcsia S_1!, {DAT0, DAT1}
|
|
+ .endif
|
|
+ ldrmi DAT2, [S_1], #4
|
|
+ ldmcsia S_2!, {DAT4, DAT5}
|
|
+ ldrmi DAT6, [S_2], #4
|
|
+ movcc DAT0, #0
|
|
+ movcc DAT1, #0
|
|
+ movpl DAT2, #0
|
|
+ movcc DAT4, #0
|
|
+ movcc DAT5, #0
|
|
+ movpl DAT6, #0
|
|
+ cmp DAT0, DAT4
|
|
+ cmpeq DAT1, DAT5
|
|
+ cmpeq DAT2, DAT6
|
|
+ bne 200f
|
|
+ movs N, N, lsl #2
|
|
+ ldrcsh DAT0, [S_1], #2
|
|
+ ldrmib DAT1, [S_1]
|
|
+ ldrcsh DAT4, [S_2], #2
|
|
+ ldrmib DAT5, [S_2]
|
|
+ movcc DAT0, #0
|
|
+ movpl DAT1, #0
|
|
+ movcc DAT4, #0
|
|
+ movpl DAT5, #0
|
|
+ cmp DAT0, DAT4
|
|
+ cmpeq DAT1, DAT5
|
|
+ bne 200f
|
|
+.endm
|
|
+
|
|
+.macro memcmp_long_inner_loop unaligned
|
|
+110:
|
|
+ memcmp_process_head unaligned
|
|
+ pld [S_2, #prefetch_distance*32 + 16]
|
|
+ memcmp_process_tail
|
|
+ memcmp_process_head unaligned
|
|
+ pld [S_1, OFF]
|
|
+ memcmp_process_tail
|
|
+ subs N, N, #32
|
|
+ bhs 110b
|
|
+ /* Just before the final (prefetch_distance+1) 32-byte blocks,
|
|
+ * deal with final preloads */
|
|
+ preload_trailing 0, S_1, N, DAT0
|
|
+ preload_trailing 0, S_2, N, DAT0
|
|
+ add N, N, #(prefetch_distance+2)*32 - 16
|
|
+120:
|
|
+ memcmp_process_head unaligned
|
|
+ memcmp_process_tail
|
|
+ subs N, N, #16
|
|
+ bhs 120b
|
|
+ /* Trailing words and bytes */
|
|
+ tst N, #15
|
|
+ beq 199f
|
|
+ memcmp_trailing_15bytes unaligned
|
|
+199: /* Reached end without detecting a difference */
|
|
+ mov a1, #0
|
|
+ setend le
|
|
+ pop {DAT1-DAT6, pc}
|
|
+.endm
|
|
+
|
|
+.macro memcmp_short_inner_loop unaligned
|
|
+ subs N, N, #16 /* simplifies inner loop termination */
|
|
+ blo 122f
|
|
+120:
|
|
+ memcmp_process_head unaligned
|
|
+ memcmp_process_tail
|
|
+ subs N, N, #16
|
|
+ bhs 120b
|
|
+122: /* Trailing words and bytes */
|
|
+ tst N, #15
|
|
+ beq 199f
|
|
+ memcmp_trailing_15bytes unaligned
|
|
+199: /* Reached end without detecting a difference */
|
|
+ mov a1, #0
|
|
+ setend le
|
|
+ pop {DAT1-DAT6, pc}
|
|
+.endm
|
|
+
|
|
+/*
|
|
+ * int memcmp(const void *s1, const void *s2, size_t n);
|
|
+ * On entry:
|
|
+ * a1 = pointer to buffer 1
|
|
+ * a2 = pointer to buffer 2
|
|
+ * a3 = number of bytes to compare (as unsigned chars)
|
|
+ * On exit:
|
|
+ * a1 = >0/=0/<0 if s1 >/=/< s2
|
|
+ */
|
|
+
|
|
+.set prefetch_distance, 2
|
|
+
|
|
+ENTRY(memcmp)
|
|
+ S_1 .req a1
|
|
+ S_2 .req a2
|
|
+ N .req a3
|
|
+ DAT0 .req a4
|
|
+ DAT1 .req v1
|
|
+ DAT2 .req v2
|
|
+ DAT3 .req v3
|
|
+ DAT4 .req v4
|
|
+ DAT5 .req v5
|
|
+ DAT6 .req v6
|
|
+ DAT7 .req ip
|
|
+ OFF .req lr
|
|
+
|
|
+ push {DAT1-DAT6, lr}
|
|
+ setend be /* lowest-addressed bytes are most significant */
|
|
+
|
|
+ /* To preload ahead as we go, we need at least (prefetch_distance+2) 32-byte blocks */
|
|
+ cmp N, #(prefetch_distance+3)*32 - 1
|
|
+ blo 170f
|
|
+
|
|
+ /* Long case */
|
|
+ /* Adjust N so that the decrement instruction can also test for
|
|
+ * inner loop termination. We want it to stop when there are
|
|
+ * (prefetch_distance+1) complete blocks to go. */
|
|
+ sub N, N, #(prefetch_distance+2)*32
|
|
+ preload_leading_step1 0, DAT0, S_1
|
|
+ preload_leading_step1 0, DAT1, S_2
|
|
+ tst S_2, #31
|
|
+ beq 154f
|
|
+ rsb OFF, S_2, #0 /* no need to AND with 15 here */
|
|
+ preload_leading_step2 0, DAT0, S_1, OFF, DAT2
|
|
+ preload_leading_step2 0, DAT1, S_2, OFF, DAT2
|
|
+ memcmp_leading_31bytes
|
|
+154: /* Second source now cacheline (32-byte) aligned; we have at
|
|
+ * least one prefetch to go. */
|
|
+ /* Prefetch offset is best selected such that it lies in the
|
|
+ * first 8 of each 32 bytes - but it's just as easy to aim for
|
|
+ * the first one */
|
|
+ and OFF, S_1, #31
|
|
+ rsb OFF, OFF, #32*prefetch_distance
|
|
+ tst S_1, #3
|
|
+ bne 140f
|
|
+ memcmp_long_inner_loop 0
|
|
+140: memcmp_long_inner_loop 1
|
|
+
|
|
+170: /* Short case */
|
|
+ teq N, #0
|
|
+ beq 199f
|
|
+ preload_all 0, 0, 0, S_1, N, DAT0, DAT1
|
|
+ preload_all 0, 0, 0, S_2, N, DAT0, DAT1
|
|
+ tst S_2, #3
|
|
+ beq 174f
|
|
+172: subs N, N, #1
|
|
+ blo 199f
|
|
+ ldrb DAT0, [S_1], #1
|
|
+ ldrb DAT4, [S_2], #1
|
|
+ cmp DAT0, DAT4
|
|
+ bne 200f
|
|
+ tst S_2, #3
|
|
+ bne 172b
|
|
+174: /* Second source now 4-byte aligned; we have 0 or more bytes to go */
|
|
+ tst S_1, #3
|
|
+ bne 140f
|
|
+ memcmp_short_inner_loop 0
|
|
+140: memcmp_short_inner_loop 1
|
|
+
|
|
+200: /* Difference found: determine sign. */
|
|
+ movhi a1, #1
|
|
+ movlo a1, #-1
|
|
+ setend le
|
|
+ pop {DAT1-DAT6, pc}
|
|
+
|
|
+ .unreq S_1
|
|
+ .unreq S_2
|
|
+ .unreq N
|
|
+ .unreq DAT0
|
|
+ .unreq DAT1
|
|
+ .unreq DAT2
|
|
+ .unreq DAT3
|
|
+ .unreq DAT4
|
|
+ .unreq DAT5
|
|
+ .unreq DAT6
|
|
+ .unreq DAT7
|
|
+ .unreq OFF
|
|
+ENDPROC(memcmp)
|
|
--- /dev/null
|
|
+++ b/arch/arm/lib/memcpy_rpi.S
|
|
@@ -0,0 +1,61 @@
|
|
+/*
|
|
+Copyright (c) 2013, Raspberry Pi Foundation
|
|
+Copyright (c) 2013, RISC OS Open Ltd
|
|
+All rights reserved.
|
|
+
|
|
+Redistribution and use in source and binary forms, with or without
|
|
+modification, are permitted provided that the following conditions are met:
|
|
+ * Redistributions of source code must retain the above copyright
|
|
+ notice, this list of conditions and the following disclaimer.
|
|
+ * Redistributions in binary form must reproduce the above copyright
|
|
+ notice, this list of conditions and the following disclaimer in the
|
|
+ documentation and/or other materials provided with the distribution.
|
|
+ * Neither the name of the copyright holder nor the
|
|
+ names of its contributors may be used to endorse or promote products
|
|
+ derived from this software without specific prior written permission.
|
|
+
|
|
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
+*/
|
|
+
|
|
+#include <linux/linkage.h>
|
|
+#include "arm-mem.h"
|
|
+#include "memcpymove.h"
|
|
+
|
|
+/* Prevent the stack from becoming executable */
|
|
+#if defined(__linux__) && defined(__ELF__)
|
|
+.section .note.GNU-stack,"",%progbits
|
|
+#endif
|
|
+
|
|
+ .text
|
|
+ .arch armv6
|
|
+ .object_arch armv4
|
|
+ .arm
|
|
+ .altmacro
|
|
+ .p2align 2
|
|
+
|
|
+/*
|
|
+ * void *memcpy(void * restrict s1, const void * restrict s2, size_t n);
|
|
+ * On entry:
|
|
+ * a1 = pointer to destination
|
|
+ * a2 = pointer to source
|
|
+ * a3 = number of bytes to copy
|
|
+ * On exit:
|
|
+ * a1 preserved
|
|
+ */
|
|
+
|
|
+.set prefetch_distance, 3
|
|
+
|
|
+ENTRY(mmiocpy)
|
|
+ENTRY(memcpy)
|
|
+ memcpy 0
|
|
+ENDPROC(memcpy)
|
|
+ENDPROC(mmiocpy)
|
|
--- /dev/null
|
|
+++ b/arch/arm/lib/memcpymove.h
|
|
@@ -0,0 +1,506 @@
|
|
+/*
|
|
+Copyright (c) 2013, Raspberry Pi Foundation
|
|
+Copyright (c) 2013, RISC OS Open Ltd
|
|
+All rights reserved.
|
|
+
|
|
+Redistribution and use in source and binary forms, with or without
|
|
+modification, are permitted provided that the following conditions are met:
|
|
+ * Redistributions of source code must retain the above copyright
|
|
+ notice, this list of conditions and the following disclaimer.
|
|
+ * Redistributions in binary form must reproduce the above copyright
|
|
+ notice, this list of conditions and the following disclaimer in the
|
|
+ documentation and/or other materials provided with the distribution.
|
|
+ * Neither the name of the copyright holder nor the
|
|
+ names of its contributors may be used to endorse or promote products
|
|
+ derived from this software without specific prior written permission.
|
|
+
|
|
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
+*/
|
|
+
|
|
+.macro unaligned_words backwards, align, use_pld, words, r0, r1, r2, r3, r4, r5, r6, r7, r8
|
|
+ .if words == 1
|
|
+ .if backwards
|
|
+ mov r1, r0, lsl #32-align*8
|
|
+ ldr r0, [S, #-4]!
|
|
+ orr r1, r1, r0, lsr #align*8
|
|
+ str r1, [D, #-4]!
|
|
+ .else
|
|
+ mov r0, r1, lsr #align*8
|
|
+ ldr r1, [S, #4]!
|
|
+ orr r0, r0, r1, lsl #32-align*8
|
|
+ str r0, [D], #4
|
|
+ .endif
|
|
+ .elseif words == 2
|
|
+ .if backwards
|
|
+ ldr r1, [S, #-4]!
|
|
+ mov r2, r0, lsl #32-align*8
|
|
+ ldr r0, [S, #-4]!
|
|
+ orr r2, r2, r1, lsr #align*8
|
|
+ mov r1, r1, lsl #32-align*8
|
|
+ orr r1, r1, r0, lsr #align*8
|
|
+ stmdb D!, {r1, r2}
|
|
+ .else
|
|
+ ldr r1, [S, #4]!
|
|
+ mov r0, r2, lsr #align*8
|
|
+ ldr r2, [S, #4]!
|
|
+ orr r0, r0, r1, lsl #32-align*8
|
|
+ mov r1, r1, lsr #align*8
|
|
+ orr r1, r1, r2, lsl #32-align*8
|
|
+ stmia D!, {r0, r1}
|
|
+ .endif
|
|
+ .elseif words == 4
|
|
+ .if backwards
|
|
+ ldmdb S!, {r2, r3}
|
|
+ mov r4, r0, lsl #32-align*8
|
|
+ ldmdb S!, {r0, r1}
|
|
+ orr r4, r4, r3, lsr #align*8
|
|
+ mov r3, r3, lsl #32-align*8
|
|
+ orr r3, r3, r2, lsr #align*8
|
|
+ mov r2, r2, lsl #32-align*8
|
|
+ orr r2, r2, r1, lsr #align*8
|
|
+ mov r1, r1, lsl #32-align*8
|
|
+ orr r1, r1, r0, lsr #align*8
|
|
+ stmdb D!, {r1, r2, r3, r4}
|
|
+ .else
|
|
+ ldmib S!, {r1, r2}
|
|
+ mov r0, r4, lsr #align*8
|
|
+ ldmib S!, {r3, r4}
|
|
+ orr r0, r0, r1, lsl #32-align*8
|
|
+ mov r1, r1, lsr #align*8
|
|
+ orr r1, r1, r2, lsl #32-align*8
|
|
+ mov r2, r2, lsr #align*8
|
|
+ orr r2, r2, r3, lsl #32-align*8
|
|
+ mov r3, r3, lsr #align*8
|
|
+ orr r3, r3, r4, lsl #32-align*8
|
|
+ stmia D!, {r0, r1, r2, r3}
|
|
+ .endif
|
|
+ .elseif words == 8
|
|
+ .if backwards
|
|
+ ldmdb S!, {r4, r5, r6, r7}
|
|
+ mov r8, r0, lsl #32-align*8
|
|
+ ldmdb S!, {r0, r1, r2, r3}
|
|
+ .if use_pld
|
|
+ pld [S, OFF]
|
|
+ .endif
|
|
+ orr r8, r8, r7, lsr #align*8
|
|
+ mov r7, r7, lsl #32-align*8
|
|
+ orr r7, r7, r6, lsr #align*8
|
|
+ mov r6, r6, lsl #32-align*8
|
|
+ orr r6, r6, r5, lsr #align*8
|
|
+ mov r5, r5, lsl #32-align*8
|
|
+ orr r5, r5, r4, lsr #align*8
|
|
+ mov r4, r4, lsl #32-align*8
|
|
+ orr r4, r4, r3, lsr #align*8
|
|
+ mov r3, r3, lsl #32-align*8
|
|
+ orr r3, r3, r2, lsr #align*8
|
|
+ mov r2, r2, lsl #32-align*8
|
|
+ orr r2, r2, r1, lsr #align*8
|
|
+ mov r1, r1, lsl #32-align*8
|
|
+ orr r1, r1, r0, lsr #align*8
|
|
+ stmdb D!, {r5, r6, r7, r8}
|
|
+ stmdb D!, {r1, r2, r3, r4}
|
|
+ .else
|
|
+ ldmib S!, {r1, r2, r3, r4}
|
|
+ mov r0, r8, lsr #align*8
|
|
+ ldmib S!, {r5, r6, r7, r8}
|
|
+ .if use_pld
|
|
+ pld [S, OFF]
|
|
+ .endif
|
|
+ orr r0, r0, r1, lsl #32-align*8
|
|
+ mov r1, r1, lsr #align*8
|
|
+ orr r1, r1, r2, lsl #32-align*8
|
|
+ mov r2, r2, lsr #align*8
|
|
+ orr r2, r2, r3, lsl #32-align*8
|
|
+ mov r3, r3, lsr #align*8
|
|
+ orr r3, r3, r4, lsl #32-align*8
|
|
+ mov r4, r4, lsr #align*8
|
|
+ orr r4, r4, r5, lsl #32-align*8
|
|
+ mov r5, r5, lsr #align*8
|
|
+ orr r5, r5, r6, lsl #32-align*8
|
|
+ mov r6, r6, lsr #align*8
|
|
+ orr r6, r6, r7, lsl #32-align*8
|
|
+ mov r7, r7, lsr #align*8
|
|
+ orr r7, r7, r8, lsl #32-align*8
|
|
+ stmia D!, {r0, r1, r2, r3}
|
|
+ stmia D!, {r4, r5, r6, r7}
|
|
+ .endif
|
|
+ .endif
|
|
+.endm
|
|
+
|
|
+.macro memcpy_leading_15bytes backwards, align
|
|
+ movs DAT1, DAT2, lsl #31
|
|
+ sub N, N, DAT2
|
|
+ .if backwards
|
|
+ ldrmib DAT0, [S, #-1]!
|
|
+ ldrcsh DAT1, [S, #-2]!
|
|
+ strmib DAT0, [D, #-1]!
|
|
+ strcsh DAT1, [D, #-2]!
|
|
+ .else
|
|
+ ldrmib DAT0, [S], #1
|
|
+ ldrcsh DAT1, [S], #2
|
|
+ strmib DAT0, [D], #1
|
|
+ strcsh DAT1, [D], #2
|
|
+ .endif
|
|
+ movs DAT1, DAT2, lsl #29
|
|
+ .if backwards
|
|
+ ldrmi DAT0, [S, #-4]!
|
|
+ .if align == 0
|
|
+ ldmcsdb S!, {DAT1, DAT2}
|
|
+ .else
|
|
+ ldrcs DAT2, [S, #-4]!
|
|
+ ldrcs DAT1, [S, #-4]!
|
|
+ .endif
|
|
+ strmi DAT0, [D, #-4]!
|
|
+ stmcsdb D!, {DAT1, DAT2}
|
|
+ .else
|
|
+ ldrmi DAT0, [S], #4
|
|
+ .if align == 0
|
|
+ ldmcsia S!, {DAT1, DAT2}
|
|
+ .else
|
|
+ ldrcs DAT1, [S], #4
|
|
+ ldrcs DAT2, [S], #4
|
|
+ .endif
|
|
+ strmi DAT0, [D], #4
|
|
+ stmcsia D!, {DAT1, DAT2}
|
|
+ .endif
|
|
+.endm
|
|
+
|
|
+.macro memcpy_trailing_15bytes backwards, align
|
|
+ movs N, N, lsl #29
|
|
+ .if backwards
|
|
+ .if align == 0
|
|
+ ldmcsdb S!, {DAT0, DAT1}
|
|
+ .else
|
|
+ ldrcs DAT1, [S, #-4]!
|
|
+ ldrcs DAT0, [S, #-4]!
|
|
+ .endif
|
|
+ ldrmi DAT2, [S, #-4]!
|
|
+ stmcsdb D!, {DAT0, DAT1}
|
|
+ strmi DAT2, [D, #-4]!
|
|
+ .else
|
|
+ .if align == 0
|
|
+ ldmcsia S!, {DAT0, DAT1}
|
|
+ .else
|
|
+ ldrcs DAT0, [S], #4
|
|
+ ldrcs DAT1, [S], #4
|
|
+ .endif
|
|
+ ldrmi DAT2, [S], #4
|
|
+ stmcsia D!, {DAT0, DAT1}
|
|
+ strmi DAT2, [D], #4
|
|
+ .endif
|
|
+ movs N, N, lsl #2
|
|
+ .if backwards
|
|
+ ldrcsh DAT0, [S, #-2]!
|
|
+ ldrmib DAT1, [S, #-1]
|
|
+ strcsh DAT0, [D, #-2]!
|
|
+ strmib DAT1, [D, #-1]
|
|
+ .else
|
|
+ ldrcsh DAT0, [S], #2
|
|
+ ldrmib DAT1, [S]
|
|
+ strcsh DAT0, [D], #2
|
|
+ strmib DAT1, [D]
|
|
+ .endif
|
|
+.endm
|
|
+
|
|
+.macro memcpy_long_inner_loop backwards, align
|
|
+ .if align != 0
|
|
+ .if backwards
|
|
+ ldr DAT0, [S, #-align]!
|
|
+ .else
|
|
+ ldr LAST, [S, #-align]!
|
|
+ .endif
|
|
+ .endif
|
|
+110:
|
|
+ .if align == 0
|
|
+ .if backwards
|
|
+ ldmdb S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
|
|
+ pld [S, OFF]
|
|
+ stmdb D!, {DAT4, DAT5, DAT6, LAST}
|
|
+ stmdb D!, {DAT0, DAT1, DAT2, DAT3}
|
|
+ .else
|
|
+ ldmia S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
|
|
+ pld [S, OFF]
|
|
+ stmia D!, {DAT0, DAT1, DAT2, DAT3}
|
|
+ stmia D!, {DAT4, DAT5, DAT6, LAST}
|
|
+ .endif
|
|
+ .else
|
|
+ unaligned_words backwards, align, 1, 8, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7, LAST
|
|
+ .endif
|
|
+ subs N, N, #32
|
|
+ bhs 110b
|
|
+ /* Just before the final (prefetch_distance+1) 32-byte blocks, deal with final preloads */
|
|
+ preload_trailing backwards, S, N, OFF
|
|
+ add N, N, #(prefetch_distance+2)*32 - 32
|
|
+120:
|
|
+ .if align == 0
|
|
+ .if backwards
|
|
+ ldmdb S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
|
|
+ stmdb D!, {DAT4, DAT5, DAT6, LAST}
|
|
+ stmdb D!, {DAT0, DAT1, DAT2, DAT3}
|
|
+ .else
|
|
+ ldmia S!, {DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, LAST}
|
|
+ stmia D!, {DAT0, DAT1, DAT2, DAT3}
|
|
+ stmia D!, {DAT4, DAT5, DAT6, LAST}
|
|
+ .endif
|
|
+ .else
|
|
+ unaligned_words backwards, align, 0, 8, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7, LAST
|
|
+ .endif
|
|
+ subs N, N, #32
|
|
+ bhs 120b
|
|
+ tst N, #16
|
|
+ .if align == 0
|
|
+ .if backwards
|
|
+ ldmnedb S!, {DAT0, DAT1, DAT2, LAST}
|
|
+ stmnedb D!, {DAT0, DAT1, DAT2, LAST}
|
|
+ .else
|
|
+ ldmneia S!, {DAT0, DAT1, DAT2, LAST}
|
|
+ stmneia D!, {DAT0, DAT1, DAT2, LAST}
|
|
+ .endif
|
|
+ .else
|
|
+ beq 130f
|
|
+ unaligned_words backwards, align, 0, 4, DAT0, DAT1, DAT2, DAT3, LAST
|
|
+130:
|
|
+ .endif
|
|
+ /* Trailing words and bytes */
|
|
+ tst N, #15
|
|
+ beq 199f
|
|
+ .if align != 0
|
|
+ add S, S, #align
|
|
+ .endif
|
|
+ memcpy_trailing_15bytes backwards, align
|
|
+199:
|
|
+ pop {DAT3, DAT4, DAT5, DAT6, DAT7}
|
|
+ pop {D, DAT1, DAT2, pc}
|
|
+.endm
|
|
+
|
|
+.macro memcpy_medium_inner_loop backwards, align
|
|
+120:
|
|
+ .if backwards
|
|
+ .if align == 0
|
|
+ ldmdb S!, {DAT0, DAT1, DAT2, LAST}
|
|
+ .else
|
|
+ ldr LAST, [S, #-4]!
|
|
+ ldr DAT2, [S, #-4]!
|
|
+ ldr DAT1, [S, #-4]!
|
|
+ ldr DAT0, [S, #-4]!
|
|
+ .endif
|
|
+ stmdb D!, {DAT0, DAT1, DAT2, LAST}
|
|
+ .else
|
|
+ .if align == 0
|
|
+ ldmia S!, {DAT0, DAT1, DAT2, LAST}
|
|
+ .else
|
|
+ ldr DAT0, [S], #4
|
|
+ ldr DAT1, [S], #4
|
|
+ ldr DAT2, [S], #4
|
|
+ ldr LAST, [S], #4
|
|
+ .endif
|
|
+ stmia D!, {DAT0, DAT1, DAT2, LAST}
|
|
+ .endif
|
|
+ subs N, N, #16
|
|
+ bhs 120b
|
|
+ /* Trailing words and bytes */
|
|
+ tst N, #15
|
|
+ beq 199f
|
|
+ memcpy_trailing_15bytes backwards, align
|
|
+199:
|
|
+ pop {D, DAT1, DAT2, pc}
|
|
+.endm
|
|
+
|
|
+.macro memcpy_short_inner_loop backwards, align
|
|
+ tst N, #16
|
|
+ .if backwards
|
|
+ .if align == 0
|
|
+ ldmnedb S!, {DAT0, DAT1, DAT2, LAST}
|
|
+ .else
|
|
+ ldrne LAST, [S, #-4]!
|
|
+ ldrne DAT2, [S, #-4]!
|
|
+ ldrne DAT1, [S, #-4]!
|
|
+ ldrne DAT0, [S, #-4]!
|
|
+ .endif
|
|
+ stmnedb D!, {DAT0, DAT1, DAT2, LAST}
|
|
+ .else
|
|
+ .if align == 0
|
|
+ ldmneia S!, {DAT0, DAT1, DAT2, LAST}
|
|
+ .else
|
|
+ ldrne DAT0, [S], #4
|
|
+ ldrne DAT1, [S], #4
|
|
+ ldrne DAT2, [S], #4
|
|
+ ldrne LAST, [S], #4
|
|
+ .endif
|
|
+ stmneia D!, {DAT0, DAT1, DAT2, LAST}
|
|
+ .endif
|
|
+ memcpy_trailing_15bytes backwards, align
|
|
+199:
|
|
+ pop {D, DAT1, DAT2, pc}
|
|
+.endm
|
|
+
|
|
+.macro memcpy backwards
|
|
+ D .req a1
|
|
+ S .req a2
|
|
+ N .req a3
|
|
+ DAT0 .req a4
|
|
+ DAT1 .req v1
|
|
+ DAT2 .req v2
|
|
+ DAT3 .req v3
|
|
+ DAT4 .req v4
|
|
+ DAT5 .req v5
|
|
+ DAT6 .req v6
|
|
+ DAT7 .req sl
|
|
+ LAST .req ip
|
|
+ OFF .req lr
|
|
+
|
|
+ .cfi_startproc
|
|
+
|
|
+ push {D, DAT1, DAT2, lr}
|
|
+
|
|
+ .cfi_def_cfa_offset 16
|
|
+ .cfi_rel_offset D, 0
|
|
+ .cfi_undefined S
|
|
+ .cfi_undefined N
|
|
+ .cfi_undefined DAT0
|
|
+ .cfi_rel_offset DAT1, 4
|
|
+ .cfi_rel_offset DAT2, 8
|
|
+ .cfi_undefined LAST
|
|
+ .cfi_rel_offset lr, 12
|
|
+
|
|
+ .if backwards
|
|
+ add D, D, N
|
|
+ add S, S, N
|
|
+ .endif
|
|
+
|
|
+ /* See if we're guaranteed to have at least one 16-byte aligned 16-byte write */
|
|
+ cmp N, #31
|
|
+ blo 170f
|
|
+ /* To preload ahead as we go, we need at least (prefetch_distance+2) 32-byte blocks */
|
|
+ cmp N, #(prefetch_distance+3)*32 - 1
|
|
+ blo 160f
|
|
+
|
|
+ /* Long case */
|
|
+ push {DAT3, DAT4, DAT5, DAT6, DAT7}
|
|
+
|
|
+ .cfi_def_cfa_offset 36
|
|
+ .cfi_rel_offset D, 20
|
|
+ .cfi_rel_offset DAT1, 24
|
|
+ .cfi_rel_offset DAT2, 28
|
|
+ .cfi_rel_offset DAT3, 0
|
|
+ .cfi_rel_offset DAT4, 4
|
|
+ .cfi_rel_offset DAT5, 8
|
|
+ .cfi_rel_offset DAT6, 12
|
|
+ .cfi_rel_offset DAT7, 16
|
|
+ .cfi_rel_offset lr, 32
|
|
+
|
|
+ /* Adjust N so that the decrement instruction can also test for
|
|
+ * inner loop termination. We want it to stop when there are
|
|
+ * (prefetch_distance+1) complete blocks to go. */
|
|
+ sub N, N, #(prefetch_distance+2)*32
|
|
+ preload_leading_step1 backwards, DAT0, S
|
|
+ .if backwards
|
|
+ /* Bug in GAS: it accepts, but mis-assembles the instruction
|
|
+ * ands DAT2, D, #60, 2
|
|
+ * which sets DAT2 to the number of leading bytes until destination is aligned and also clears C (sets borrow)
|
|
+ */
|
|
+ .word 0xE210513C
|
|
+ beq 154f
|
|
+ .else
|
|
+ ands DAT2, D, #15
|
|
+ beq 154f
|
|
+ rsb DAT2, DAT2, #16 /* number of leading bytes until destination aligned */
|
|
+ .endif
|
|
+ preload_leading_step2 backwards, DAT0, S, DAT2, OFF
|
|
+ memcpy_leading_15bytes backwards, 1
|
|
+154: /* Destination now 16-byte aligned; we have at least one prefetch as well as at least one 16-byte output block */
|
|
+ /* Prefetch offset is best selected such that it lies in the first 8 of each 32 bytes - but it's just as easy to aim for the first one */
|
|
+ .if backwards
|
|
+ rsb OFF, S, #3
|
|
+ and OFF, OFF, #28
|
|
+ sub OFF, OFF, #32*(prefetch_distance+1)
|
|
+ .else
|
|
+ and OFF, S, #28
|
|
+ rsb OFF, OFF, #32*prefetch_distance
|
|
+ .endif
|
|
+ movs DAT0, S, lsl #31
|
|
+ bhi 157f
|
|
+ bcs 156f
|
|
+ bmi 155f
|
|
+ memcpy_long_inner_loop backwards, 0
|
|
+155: memcpy_long_inner_loop backwards, 1
|
|
+156: memcpy_long_inner_loop backwards, 2
|
|
+157: memcpy_long_inner_loop backwards, 3
|
|
+
|
|
+ .cfi_def_cfa_offset 16
|
|
+ .cfi_rel_offset D, 0
|
|
+ .cfi_rel_offset DAT1, 4
|
|
+ .cfi_rel_offset DAT2, 8
|
|
+ .cfi_same_value DAT3
|
|
+ .cfi_same_value DAT4
|
|
+ .cfi_same_value DAT5
|
|
+ .cfi_same_value DAT6
|
|
+ .cfi_same_value DAT7
|
|
+ .cfi_rel_offset lr, 12
|
|
+
|
|
+160: /* Medium case */
|
|
+ preload_all backwards, 0, 0, S, N, DAT2, OFF
|
|
+ sub N, N, #16 /* simplifies inner loop termination */
|
|
+ .if backwards
|
|
+ ands DAT2, D, #15
|
|
+ beq 164f
|
|
+ .else
|
|
+ ands DAT2, D, #15
|
|
+ beq 164f
|
|
+ rsb DAT2, DAT2, #16
|
|
+ .endif
|
|
+ memcpy_leading_15bytes backwards, align
|
|
+164: /* Destination now 16-byte aligned; we have at least one 16-byte output block */
|
|
+ tst S, #3
|
|
+ bne 140f
|
|
+ memcpy_medium_inner_loop backwards, 0
|
|
+140: memcpy_medium_inner_loop backwards, 1
|
|
+
|
|
+170: /* Short case, less than 31 bytes, so no guarantee of at least one 16-byte block */
|
|
+ teq N, #0
|
|
+ beq 199f
|
|
+ preload_all backwards, 1, 0, S, N, DAT2, LAST
|
|
+ tst D, #3
|
|
+ beq 174f
|
|
+172: subs N, N, #1
|
|
+ blo 199f
|
|
+ .if backwards
|
|
+ ldrb DAT0, [S, #-1]!
|
|
+ strb DAT0, [D, #-1]!
|
|
+ .else
|
|
+ ldrb DAT0, [S], #1
|
|
+ strb DAT0, [D], #1
|
|
+ .endif
|
|
+ tst D, #3
|
|
+ bne 172b
|
|
+174: /* Destination now 4-byte aligned; we have 0 or more output bytes to go */
|
|
+ tst S, #3
|
|
+ bne 140f
|
|
+ memcpy_short_inner_loop backwards, 0
|
|
+140: memcpy_short_inner_loop backwards, 1
|
|
+
|
|
+ .cfi_endproc
|
|
+
|
|
+ .unreq D
|
|
+ .unreq S
|
|
+ .unreq N
|
|
+ .unreq DAT0
|
|
+ .unreq DAT1
|
|
+ .unreq DAT2
|
|
+ .unreq DAT3
|
|
+ .unreq DAT4
|
|
+ .unreq DAT5
|
|
+ .unreq DAT6
|
|
+ .unreq DAT7
|
|
+ .unreq LAST
|
|
+ .unreq OFF
|
|
+.endm
|
|
--- /dev/null
|
|
+++ b/arch/arm/lib/memmove_rpi.S
|
|
@@ -0,0 +1,61 @@
|
|
+/*
|
|
+Copyright (c) 2013, Raspberry Pi Foundation
|
|
+Copyright (c) 2013, RISC OS Open Ltd
|
|
+All rights reserved.
|
|
+
|
|
+Redistribution and use in source and binary forms, with or without
|
|
+modification, are permitted provided that the following conditions are met:
|
|
+ * Redistributions of source code must retain the above copyright
|
|
+ notice, this list of conditions and the following disclaimer.
|
|
+ * Redistributions in binary form must reproduce the above copyright
|
|
+ notice, this list of conditions and the following disclaimer in the
|
|
+ documentation and/or other materials provided with the distribution.
|
|
+ * Neither the name of the copyright holder nor the
|
|
+ names of its contributors may be used to endorse or promote products
|
|
+ derived from this software without specific prior written permission.
|
|
+
|
|
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
+*/
|
|
+
|
|
+#include <linux/linkage.h>
|
|
+#include "arm-mem.h"
|
|
+#include "memcpymove.h"
|
|
+
|
|
+/* Prevent the stack from becoming executable */
|
|
+#if defined(__linux__) && defined(__ELF__)
|
|
+.section .note.GNU-stack,"",%progbits
|
|
+#endif
|
|
+
|
|
+ .text
|
|
+ .arch armv6
|
|
+ .object_arch armv4
|
|
+ .arm
|
|
+ .altmacro
|
|
+ .p2align 2
|
|
+
|
|
+/*
|
|
+ * void *memmove(void *s1, const void *s2, size_t n);
|
|
+ * On entry:
|
|
+ * a1 = pointer to destination
|
|
+ * a2 = pointer to source
|
|
+ * a3 = number of bytes to copy
|
|
+ * On exit:
|
|
+ * a1 preserved
|
|
+ */
|
|
+
|
|
+.set prefetch_distance, 3
|
|
+
|
|
+ENTRY(memmove)
|
|
+ cmp a2, a1
|
|
+ bpl memcpy /* pl works even over -1 - 0 and 0x7fffffff - 0x80000000 boundaries */
|
|
+ memcpy 1
|
|
+ENDPROC(memmove)
|
|
--- /dev/null
|
|
+++ b/arch/arm/lib/memset_rpi.S
|
|
@@ -0,0 +1,123 @@
|
|
+/*
|
|
+Copyright (c) 2013, Raspberry Pi Foundation
|
|
+Copyright (c) 2013, RISC OS Open Ltd
|
|
+All rights reserved.
|
|
+
|
|
+Redistribution and use in source and binary forms, with or without
|
|
+modification, are permitted provided that the following conditions are met:
|
|
+ * Redistributions of source code must retain the above copyright
|
|
+ notice, this list of conditions and the following disclaimer.
|
|
+ * Redistributions in binary form must reproduce the above copyright
|
|
+ notice, this list of conditions and the following disclaimer in the
|
|
+ documentation and/or other materials provided with the distribution.
|
|
+ * Neither the name of the copyright holder nor the
|
|
+ names of its contributors may be used to endorse or promote products
|
|
+ derived from this software without specific prior written permission.
|
|
+
|
|
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
|
|
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
+*/
|
|
+
|
|
+#include <linux/linkage.h>
|
|
+#include "arm-mem.h"
|
|
+
|
|
+/* Prevent the stack from becoming executable */
|
|
+#if defined(__linux__) && defined(__ELF__)
|
|
+.section .note.GNU-stack,"",%progbits
|
|
+#endif
|
|
+
|
|
+ .text
|
|
+ .arch armv6
|
|
+ .object_arch armv4
|
|
+ .arm
|
|
+ .altmacro
|
|
+ .p2align 2
|
|
+
|
|
+/*
|
|
+ * void *memset(void *s, int c, size_t n);
|
|
+ * On entry:
|
|
+ * a1 = pointer to buffer to fill
|
|
+ * a2 = byte pattern to fill with (caller-narrowed)
|
|
+ * a3 = number of bytes to fill
|
|
+ * On exit:
|
|
+ * a1 preserved
|
|
+ */
|
|
+ENTRY(mmioset)
|
|
+ENTRY(memset)
|
|
+ S .req a1
|
|
+ DAT0 .req a2
|
|
+ N .req a3
|
|
+ DAT1 .req a4
|
|
+ DAT2 .req ip
|
|
+ DAT3 .req lr
|
|
+
|
|
+ orr DAT0, DAT0, lsl #8
|
|
+ push {S, lr}
|
|
+ orr DAT0, DAT0, lsl #16
|
|
+ mov DAT1, DAT0
|
|
+
|
|
+ /* See if we're guaranteed to have at least one 16-byte aligned 16-byte write */
|
|
+ cmp N, #31
|
|
+ blo 170f
|
|
+
|
|
+161: sub N, N, #16 /* simplifies inner loop termination */
|
|
+ /* Leading words and bytes */
|
|
+ tst S, #15
|
|
+ beq 164f
|
|
+ rsb DAT3, S, #0 /* bits 0-3 = number of leading bytes until aligned */
|
|
+ movs DAT2, DAT3, lsl #31
|
|
+ submi N, N, #1
|
|
+ strmib DAT0, [S], #1
|
|
+ subcs N, N, #2
|
|
+ strcsh DAT0, [S], #2
|
|
+ movs DAT2, DAT3, lsl #29
|
|
+ submi N, N, #4
|
|
+ strmi DAT0, [S], #4
|
|
+ subcs N, N, #8
|
|
+ stmcsia S!, {DAT0, DAT1}
|
|
+164: /* Delayed set up of DAT2 and DAT3 so we could use them as scratch registers above */
|
|
+ mov DAT2, DAT0
|
|
+ mov DAT3, DAT0
|
|
+ /* Now the inner loop of 16-byte stores */
|
|
+165: stmia S!, {DAT0, DAT1, DAT2, DAT3}
|
|
+ subs N, N, #16
|
|
+ bhs 165b
|
|
+166: /* Trailing words and bytes */
|
|
+ movs N, N, lsl #29
|
|
+ stmcsia S!, {DAT0, DAT1}
|
|
+ strmi DAT0, [S], #4
|
|
+ movs N, N, lsl #2
|
|
+ strcsh DAT0, [S], #2
|
|
+ strmib DAT0, [S]
|
|
+199: pop {S, pc}
|
|
+
|
|
+170: /* Short case */
|
|
+ mov DAT2, DAT0
|
|
+ mov DAT3, DAT0
|
|
+ tst S, #3
|
|
+ beq 174f
|
|
+172: subs N, N, #1
|
|
+ blo 199b
|
|
+ strb DAT0, [S], #1
|
|
+ tst S, #3
|
|
+ bne 172b
|
|
+174: tst N, #16
|
|
+ stmneia S!, {DAT0, DAT1, DAT2, DAT3}
|
|
+ b 166b
|
|
+
|
|
+ .unreq S
|
|
+ .unreq DAT0
|
|
+ .unreq N
|
|
+ .unreq DAT1
|
|
+ .unreq DAT2
|
|
+ .unreq DAT3
|
|
+ENDPROC(memset)
|
|
+ENDPROC(mmioset)
|
|
--- a/arch/arm/lib/uaccess_with_memcpy.c
|
|
+++ b/arch/arm/lib/uaccess_with_memcpy.c
|
|
@@ -22,6 +22,14 @@
|
|
#include <asm/current.h>
|
|
#include <asm/page.h>
|
|
|
|
+#ifndef COPY_FROM_USER_THRESHOLD
|
|
+#define COPY_FROM_USER_THRESHOLD 64
|
|
+#endif
|
|
+
|
|
+#ifndef COPY_TO_USER_THRESHOLD
|
|
+#define COPY_TO_USER_THRESHOLD 64
|
|
+#endif
|
|
+
|
|
static int
|
|
pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
|
|
{
|
|
@@ -84,7 +92,44 @@ pin_page_for_write(const void __user *_a
|
|
return 1;
|
|
}
|
|
|
|
-static unsigned long noinline
|
|
+static int
|
|
+pin_page_for_read(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
|
|
+{
|
|
+ unsigned long addr = (unsigned long)_addr;
|
|
+ pgd_t *pgd;
|
|
+ pmd_t *pmd;
|
|
+ pte_t *pte;
|
|
+ pud_t *pud;
|
|
+ spinlock_t *ptl;
|
|
+
|
|
+ pgd = pgd_offset(current->mm, addr);
|
|
+ if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd)))
|
|
+ {
|
|
+ return 0;
|
|
+ }
|
|
+ pud = pud_offset(pgd, addr);
|
|
+ if (unlikely(pud_none(*pud) || pud_bad(*pud)))
|
|
+ {
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ pmd = pmd_offset(pud, addr);
|
|
+ if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd)))
|
|
+ return 0;
|
|
+
|
|
+ pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
|
|
+ if (unlikely(!pte_present(*pte) || !pte_young(*pte))) {
|
|
+ pte_unmap_unlock(pte, ptl);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ *ptep = pte;
|
|
+ *ptlp = ptl;
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+unsigned long noinline
|
|
__copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
|
|
{
|
|
unsigned long ua_flags;
|
|
@@ -137,6 +182,57 @@ out:
|
|
return n;
|
|
}
|
|
|
|
+unsigned long noinline
|
|
+__copy_from_user_memcpy(void *to, const void __user *from, unsigned long n)
|
|
+{
|
|
+ unsigned long ua_flags;
|
|
+ int atomic;
|
|
+
|
|
+ if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
|
|
+ memcpy(to, (const void *)from, n);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ /* the mmap semaphore is taken only if not in an atomic context */
|
|
+ atomic = in_atomic();
|
|
+
|
|
+ if (!atomic)
|
|
+ down_read(¤t->mm->mmap_sem);
|
|
+ while (n) {
|
|
+ pte_t *pte;
|
|
+ spinlock_t *ptl;
|
|
+ int tocopy;
|
|
+
|
|
+ while (!pin_page_for_read(from, &pte, &ptl)) {
|
|
+ char temp;
|
|
+ if (!atomic)
|
|
+ up_read(¤t->mm->mmap_sem);
|
|
+ if (__get_user(temp, (char __user *)from))
|
|
+ goto out;
|
|
+ if (!atomic)
|
|
+ down_read(¤t->mm->mmap_sem);
|
|
+ }
|
|
+
|
|
+ tocopy = (~(unsigned long)from & ~PAGE_MASK) + 1;
|
|
+ if (tocopy > n)
|
|
+ tocopy = n;
|
|
+
|
|
+ ua_flags = uaccess_save_and_enable();
|
|
+ memcpy(to, (const void *)from, tocopy);
|
|
+ uaccess_restore(ua_flags);
|
|
+ to += tocopy;
|
|
+ from += tocopy;
|
|
+ n -= tocopy;
|
|
+
|
|
+ pte_unmap_unlock(pte, ptl);
|
|
+ }
|
|
+ if (!atomic)
|
|
+ up_read(¤t->mm->mmap_sem);
|
|
+
|
|
+out:
|
|
+ return n;
|
|
+}
|
|
+
|
|
unsigned long
|
|
arm_copy_to_user(void __user *to, const void *from, unsigned long n)
|
|
{
|
|
@@ -147,7 +243,7 @@ arm_copy_to_user(void __user *to, const
|
|
* With frame pointer disabled, tail call optimization kicks in
|
|
* as well making this test almost invisible.
|
|
*/
|
|
- if (n < 64) {
|
|
+ if (n < COPY_TO_USER_THRESHOLD) {
|
|
unsigned long ua_flags = uaccess_save_and_enable();
|
|
n = __copy_to_user_std(to, from, n);
|
|
uaccess_restore(ua_flags);
|
|
@@ -156,6 +252,26 @@ arm_copy_to_user(void __user *to, const
|
|
}
|
|
return n;
|
|
}
|
|
+
|
|
+unsigned long __must_check
|
|
+arm_copy_from_user(void *to, const void __user *from, unsigned long n)
|
|
+{
|
|
+ /*
|
|
+ * This test is stubbed out of the main function above to keep
|
|
+ * the overhead for small copies low by avoiding a large
|
|
+ * register dump on the stack just to reload them right away.
|
|
+ * With frame pointer disabled, tail call optimization kicks in
|
|
+ * as well making this test almost invisible.
|
|
+ */
|
|
+ if (n < COPY_TO_USER_THRESHOLD) {
|
|
+ unsigned long ua_flags = uaccess_save_and_enable();
|
|
+ n = __copy_from_user_std(to, from, n);
|
|
+ uaccess_restore(ua_flags);
|
|
+ } else {
|
|
+ n = __copy_from_user_memcpy(to, from, n);
|
|
+ }
|
|
+ return n;
|
|
+}
|
|
|
|
static unsigned long noinline
|
|
__clear_user_memset(void __user *addr, unsigned long n)
|
|
--- a/arch/arm/mach-bcm/Kconfig
|
|
+++ b/arch/arm/mach-bcm/Kconfig
|
|
@@ -174,6 +174,13 @@ config ARCH_BCM_53573
|
|
The base chip is BCM53573 and there are some packaging modifications
|
|
like BCM47189 and BCM47452.
|
|
|
|
+config BCM2835_FAST_MEMCPY
|
|
+ bool "Enable optimized __copy_to_user and __copy_from_user"
|
|
+ depends on ARCH_BCM2835 && ARCH_MULTI_V6
|
|
+ default y
|
|
+ help
|
|
+ Optimized versions of __copy_to_user and __copy_from_user for Pi1.
|
|
+
|
|
config ARCH_BCM_63XX
|
|
bool "Broadcom BCM63xx DSL SoC"
|
|
depends on ARCH_MULTI_V7
|
|
|