MEMCOPY: use glibc version

file:a7b157391fd42b80bbb87aece2d5357d942dfc5c(new)
--- /dev/null
+++ b/include/linux/memcopy.h
@@ -0,0 +1,226 @@
+/*
+ * memcopy.h -- definitions for memory copy functions. Generic C version.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+ * Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * The code is derived from the GNU C Library.
+ * Copyright (C) 1991, 1992, 1993, 1997, 2004 Free Software Foundation, Inc.
+ */
+#ifndef _LINUX_MEMCOPY_H_
+#define _LINUX_MEMCOPY_H_
+
+/*
+ * The strategy of the memory functions is:
+ *
+ * 1. Copy bytes until the destination pointer is aligned.
+ *
+ * 2. Copy words in unrolled loops. If the source and destination
+ * are not aligned in the same way, use word memory operations,
+ * but shift and merge two read words before writing.
+ *
+ * 3. Copy the few remaining bytes.
+ *
+ * This is fast on processors that have at least 10 registers for
+ * allocation by GCC, and that can access memory at reg+const in one
+ * instruction.
+ */
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <asm/byteorder.h>
+
+/*
+ * The macros defined in this file are:
+ *
+ * BYTE_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_to_copy)
+ *
+ * BYTE_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_to_copy)
+ *
+ * WORD_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_remaining, nbytes_to_copy)
+ *
+ * WORD_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_remaining, nbytes_to_copy)
+ *
+ * MERGE(old_word, sh_1, new_word, sh_2)
+ *
+ * MEM_COPY_FWD(dst_beg_ptr, src_beg_ptr, nbytes_to_copy)
+ *
+ * MEM_COPY_BWD(dst_end_ptr, src_end_ptr, nbytes_to_copy)
+ */
+
+#define OP_T_THRESHOLD 16
+
+/*
+ * Type to use for aligned memory operations.
+ * This should normally be the biggest type supported by a single load
+ * and store.
+ */
+#define op_t unsigned long int
+#define OPSIZ (sizeof(op_t))
+
+/* Type to use for unaligned operations. */
+typedef unsigned char byte;
+
+#ifndef MERGE
+# ifdef __LITTLE_ENDIAN
+# define MERGE(w0, sh_1, w1, sh_2) (((w0) >> (sh_1)) | ((w1) << (sh_2)))
+# elif defined(__BIG_ENDIAN)
+# define MERGE(w0, sh_1, w1, sh_2) (((w0) << (sh_1)) | ((w1) >> (sh_2)))
+# else
+# error "Macro MERGE() hasn't defined!"
+# endif
+#endif
+
+/*
+ * Copy exactly NBYTES bytes from SRC_BP to DST_BP,
+ * without any assumptions about alignment of the pointers.
+ */
+#ifndef BYTE_COPY_FWD
+#define BYTE_COPY_FWD(dst_bp, src_bp, nbytes) \
+do { \
+ size_t __nbytes = (nbytes); \
+ while (__nbytes > 0) { \
+ byte __x = ((byte *) src_bp)[0]; \
+ src_bp += 1; \
+ __nbytes -= 1; \
+ ((byte *) dst_bp)[0] = __x; \
+ dst_bp += 1; \
+ } \
+} while (0)
+#endif
+
+/*
+ * Copy exactly NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR,
+ * beginning at the bytes right before the pointers and continuing towards
+ * smaller addresses. Don't assume anything about alignment of the
+ * pointers.
+ */
+#ifndef BYTE_COPY_BWD
+#define BYTE_COPY_BWD(dst_ep, src_ep, nbytes) \
+do { \
+ size_t __nbytes = (nbytes); \
+ while (__nbytes > 0) { \
+ byte __x; \
+ src_ep -= 1; \
+ __x = ((byte *) src_ep)[0]; \
+ dst_ep -= 1; \
+ __nbytes -= 1; \
+ ((byte *) dst_ep)[0] = __x; \
+ } \
+} while (0)
+#endif
+/*
+ * Copy *up to* NBYTES bytes from SRC_BP to DST_BP, with
+ * the assumption that DST_BP is aligned on an OPSIZ multiple. If
+ * not all bytes could be easily copied, store remaining number of bytes
+ * in NBYTES_LEFT, otherwise store 0.
+ */
+extern void _wordcopy_fwd_aligned(long int, long int, size_t);
+extern void _wordcopy_fwd_dest_aligned(long int, long int, size_t);
+#ifndef WORD_COPY_FWD
+#define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \
+do { \
+ if (src_bp % OPSIZ == 0) \
+ _wordcopy_fwd_aligned (dst_bp, src_bp, (nbytes) / OPSIZ); \
+ else \
+ _wordcopy_fwd_dest_aligned (dst_bp, src_bp, (nbytes) / OPSIZ);\
+ \
+ src_bp += (nbytes) & -OPSIZ; \
+ dst_bp += (nbytes) & -OPSIZ; \
+ (nbytes_left) = (nbytes) % OPSIZ; \
+} while (0)
+#endif
+
+/*
+ * Copy *up to* NBYTES_TO_COPY bytes from SRC_END_PTR to DST_END_PTR,
+ * beginning at the words (of type op_t) right before the pointers and
+ * continuing towards smaller addresses. May take advantage of that
+ * DST_END_PTR is aligned on an OPSIZ multiple. If not all bytes could be
+ * easily copied, store remaining number of bytes in NBYTES_REMAINING,
+ * otherwise store 0.
+ */
+extern void _wordcopy_bwd_aligned(long int, long int, size_t);
+extern void _wordcopy_bwd_dest_aligned(long int, long int, size_t);
+#ifndef WORD_COPY_BWD
+#define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes) \
+do { \
+ if (src_ep % OPSIZ == 0) \
+ _wordcopy_bwd_aligned (dst_ep, src_ep, (nbytes) / OPSIZ); \
+ else \
+ _wordcopy_bwd_dest_aligned (dst_ep, src_ep, (nbytes) / OPSIZ);\
+ \
+ src_ep -= (nbytes) & -OPSIZ; \
+ dst_ep -= (nbytes) & -OPSIZ; \
+ (nbytes_left) = (nbytes) % OPSIZ; \
+} while (0)
+#endif
+
+/* Copy memory from the beginning to the end */
+#ifndef MEM_COPY_FWD
+static __always_inline void mem_copy_fwd(unsigned long dstp,
+ unsigned long srcp,
+ size_t count)
+{
+ /* If there not too few bytes to copy, use word copy. */
+ if (count >= OP_T_THRESHOLD) {
+ /* Copy just a few bytes to make dstp aligned. */
+ count -= (-dstp) % OPSIZ;
+ BYTE_COPY_FWD(dstp, srcp, (-dstp) % OPSIZ);
+
+ /*
+ * Copy from srcp to dstp taking advantage of the known
+ * alignment of dstp. Number if bytes remaining is put in
+ * the third argument.
+ */
+ WORD_COPY_FWD(dstp, srcp, count, count);
+
+ /* Fall out and copy the tail. */
+ }
+
+ /* There are just a few bytes to copy. Use byte memory operations. */
+ BYTE_COPY_FWD(dstp, srcp, count);
+}
+#endif
+
+/* Copy memory from the end to the beginning. */
+#ifndef MEM_COPY_BWD
+static __always_inline void mem_copy_bwd(unsigned long dstp,
+ unsigned long srcp,
+ size_t count)
+{
+ srcp += count;
+ dstp += count;
+
+ /* If there not too few bytes to copy, use word copy. */
+ if (count >= OP_T_THRESHOLD) {
+ /* Copy just a few bytes to make dstp aligned. */
+ count -= dstp % OPSIZ;
+ BYTE_COPY_BWD(dstp, srcp, dstp % OPSIZ);
+
+ /*
+ * Copy from srcp to dstp taking advantage of the known
+ * alignment of dstp. Number if bytes remaining is put in
+ * the third argument.
+ */
+ WORD_COPY_BWD(dstp, srcp, count, count);
+
+ /* Fall out and copy the tail. */
+ }
+
+ /* There are just a few bytes to copy. Use byte memory operations. */
+ BYTE_COPY_BWD (dstp, srcp, count);
+}
+#endif
+
+#endif
file:578414a21471ca2d8469504e4205043bde6378d2 -> file:7bd1afa496c7a6f2654adcca763ececec9fb440c
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -12,7 +12,8 @@ lib-y := ctype.o string.o vsprintf.o cmd
idr.o int_sqrt.o extable.o prio_tree.o \
sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
proportions.o prio_heap.o ratelimit.o show_mem.o \
- is_single_threaded.o plist.o decompress.o find_next_bit.o
+ is_single_threaded.o plist.o decompress.o find_next_bit.o \
+ memcopy.o
lib-$(CONFIG_MMU) += ioremap.o
lib-$(CONFIG_SMP) += cpumask.o
file:d2d3376f73895d654b8629a87ceecffc05fe5fe7(new)
--- /dev/null
+++ b/lib/memcopy.c
@@ -0,0 +1,403 @@
+/*
+ * memcopy.c -- subroutines for memory copy functions.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+ * Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * The code is derived from the GNU C Library.
+ * Copyright (C) 1991, 1992, 1993, 1997, 2004 Free Software Foundation, Inc.
+ */
+
+/* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */
+
+#include <linux/memcopy.h>
+
+/*
+ * _wordcopy_fwd_aligned -- Copy block beginning at SRCP to block beginning
+ * at DSTP with LEN `op_t' words (not LEN bytes!).
+ * Both SRCP and DSTP should be aligned for memory operations on `op_t's.
+ */
+void _wordcopy_fwd_aligned (long int dstp, long int srcp, size_t len)
+{
+ op_t a0, a1;
+
+ switch (len % 8) {
+ case 2:
+ a0 = ((op_t *) srcp)[0];
+ srcp -= 6 * OPSIZ;
+ dstp -= 7 * OPSIZ;
+ len += 6;
+ goto do1;
+ case 3:
+ a1 = ((op_t *) srcp)[0];
+ srcp -= 5 * OPSIZ;
+ dstp -= 6 * OPSIZ;
+ len += 5;
+ goto do2;
+ case 4:
+ a0 = ((op_t *) srcp)[0];
+ srcp -= 4 * OPSIZ;
+ dstp -= 5 * OPSIZ;
+ len += 4;
+ goto do3;
+ case 5:
+ a1 = ((op_t *) srcp)[0];
+ srcp -= 3 * OPSIZ;
+ dstp -= 4 * OPSIZ;
+ len += 3;
+ goto do4;
+ case 6:
+ a0 = ((op_t *) srcp)[0];
+ srcp -= 2 * OPSIZ;
+ dstp -= 3 * OPSIZ;
+ len += 2;
+ goto do5;
+ case 7:
+ a1 = ((op_t *) srcp)[0];
+ srcp -= 1 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+ len += 1;
+ goto do6;
+ case 0:
+ if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0)
+ return;
+ a0 = ((op_t *) srcp)[0];
+ srcp -= 0 * OPSIZ;
+ dstp -= 1 * OPSIZ;
+ goto do7;
+ case 1:
+ a1 = ((op_t *) srcp)[0];
+ srcp -=-1 * OPSIZ;
+ dstp -= 0 * OPSIZ;
+ len -= 1;
+ if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0)
+ goto do0;
+ goto do8; /* No-op. */
+ }
+
+ do {
+do8:
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a1;
+do7:
+ a1 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[1] = a0;
+do6:
+ a0 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[2] = a1;
+do5:
+ a1 = ((op_t *) srcp)[3];
+ ((op_t *) dstp)[3] = a0;
+do4:
+ a0 = ((op_t *) srcp)[4];
+ ((op_t *) dstp)[4] = a1;
+do3:
+ a1 = ((op_t *) srcp)[5];
+ ((op_t *) dstp)[5] = a0;
+do2:
+ a0 = ((op_t *) srcp)[6];
+ ((op_t *) dstp)[6] = a1;
+do1:
+ a1 = ((op_t *) srcp)[7];
+ ((op_t *) dstp)[7] = a0;
+
+ srcp += 8 * OPSIZ;
+ dstp += 8 * OPSIZ;
+ len -= 8;
+ } while (len != 0);
+
+ /*
+ * This is the right position for do0. Please don't move it into
+ * the loop.
+ */
+do0:
+ ((op_t *) dstp)[0] = a1;
+}
+
+/*
+ * _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to block
+ * beginning at DSTP with LEN `op_t' words (not LEN bytes!). DSTP should
+ * be aligned for memory operations on `op_t's, but SRCP must *not* be aligned.
+ */
+
+void _wordcopy_fwd_dest_aligned (long int dstp, long int srcp, size_t len)
+{
+ op_t a0, a1, a2, a3;
+ int sh_1, sh_2;
+
+ /*
+ * Calculate how to shift a word read at the memory operation aligned
+ * srcp to make it aligned for copy.
+ */
+ sh_1 = 8 * (srcp % OPSIZ);
+ sh_2 = 8 * OPSIZ - sh_1;
+
+ /*
+ * Make SRCP aligned by rounding it down to the beginning of the `op_t'
+ * it points in the middle of.
+ */
+ srcp &= -OPSIZ;
+
+ switch (len % 4) {
+ case 2:
+ a1 = ((op_t *) srcp)[0];
+ a2 = ((op_t *) srcp)[1];
+ srcp -= 1 * OPSIZ;
+ dstp -= 3 * OPSIZ;
+ len += 2;
+ goto do1;
+ case 3:
+ a0 = ((op_t *) srcp)[0];
+ a1 = ((op_t *) srcp)[1];
+ srcp -= 0 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+ len += 1;
+ goto do2;
+ case 0:
+ if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0)
+ return;
+ a3 = ((op_t *) srcp)[0];
+ a0 = ((op_t *) srcp)[1];
+ srcp -=-1 * OPSIZ;
+ dstp -= 1 * OPSIZ;
+ len += 0;
+ goto do3;
+ case 1:
+ a2 = ((op_t *) srcp)[0];
+ a3 = ((op_t *) srcp)[1];
+ srcp -=-2 * OPSIZ;
+ dstp -= 0 * OPSIZ;
+ len -= 1;
+ if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0)
+ goto do0;
+ goto do4; /* No-op. */
+ }
+
+ do {
+do4:
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
+do3:
+ a1 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2);
+do2:
+ a2 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2);
+do1:
+ a3 = ((op_t *) srcp)[3];
+ ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2);
+
+ srcp += 4 * OPSIZ;
+ dstp += 4 * OPSIZ;
+ len -= 4;
+ } while (len != 0);
+
+ /*
+ * This is the right position for do0. Please don't move it into
+ * the loop.
+ */
+do0:
+ ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2);
+}
+
+/*
+ * _wordcopy_bwd_aligned -- Copy block finishing right before
+ * SRCP to block finishing right before DSTP with LEN `op_t' words (not LEN
+ * bytes!). Both SRCP and DSTP should be aligned for memory operations
+ * on `op_t's.
+ */
+void _wordcopy_bwd_aligned (long int dstp, long int srcp, size_t len)
+{
+ op_t a0, a1;
+
+ switch (len % 8) {
+ case 2:
+ srcp -= 2 * OPSIZ;
+ dstp -= 1 * OPSIZ;
+ a0 = ((op_t *) srcp)[1];
+ len += 6;
+ goto do1;
+ case 3:
+ srcp -= 3 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+ a1 = ((op_t *) srcp)[2];
+ len += 5;
+ goto do2;
+ case 4:
+ srcp -= 4 * OPSIZ;
+ dstp -= 3 * OPSIZ;
+ a0 = ((op_t *) srcp)[3];
+ len += 4;
+ goto do3;
+ case 5:
+ srcp -= 5 * OPSIZ;
+ dstp -= 4 * OPSIZ;
+ a1 = ((op_t *) srcp)[4];
+ len += 3;
+ goto do4;
+ case 6:
+ srcp -= 6 * OPSIZ;
+ dstp -= 5 * OPSIZ;
+ a0 = ((op_t *) srcp)[5];
+ len += 2;
+ goto do5;
+ case 7:
+ srcp -= 7 * OPSIZ;
+ dstp -= 6 * OPSIZ;
+ a1 = ((op_t *) srcp)[6];
+ len += 1;
+ goto do6;
+ case 0:
+ if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0)
+ return;
+ srcp -= 8 * OPSIZ;
+ dstp -= 7 * OPSIZ;
+ a0 = ((op_t *) srcp)[7];
+ goto do7;
+ case 1:
+ srcp -= 9 * OPSIZ;
+ dstp -= 8 * OPSIZ;
+ a1 = ((op_t *) srcp)[8];
+ len -= 1;
+ if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0)
+ goto do0;
+ goto do8; /* No-op. */
+ }
+
+ do {
+do8:
+ a0 = ((op_t *) srcp)[7];
+ ((op_t *) dstp)[7] = a1;
+do7:
+ a1 = ((op_t *) srcp)[6];
+ ((op_t *) dstp)[6] = a0;
+do6:
+ a0 = ((op_t *) srcp)[5];
+ ((op_t *) dstp)[5] = a1;
+do5:
+ a1 = ((op_t *) srcp)[4];
+ ((op_t *) dstp)[4] = a0;
+do4:
+ a0 = ((op_t *) srcp)[3];
+ ((op_t *) dstp)[3] = a1;
+do3:
+ a1 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[2] = a0;
+do2:
+ a0 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[1] = a1;
+do1:
+ a1 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = a0;
+
+ srcp -= 8 * OPSIZ;
+ dstp -= 8 * OPSIZ;
+ len -= 8;
+ } while (len != 0);
+
+ /*
+ * This is the right position for do0. Please don't move it into
+ * the loop.
+ */
+do0:
+ ((op_t *) dstp)[7] = a1;
+}
+
+/*
+ * _wordcopy_bwd_dest_aligned -- Copy block finishing right before SRCP to
+ * block finishing right before DSTP with LEN `op_t' words (not LEN bytes!).
+ * DSTP should be aligned for memory operations on `op_t', but SRCP must *not*
+ * be aligned.
+ */
+void _wordcopy_bwd_dest_aligned (long int dstp, long int srcp, size_t len)
+{
+ op_t a0, a1, a2, a3;
+ int sh_1, sh_2;
+
+ /*
+ * Calculate how to shift a word read at the memory operation aligned
+ * srcp to make it aligned for copy.
+ */
+
+ sh_1 = 8 * (srcp % OPSIZ);
+ sh_2 = 8 * OPSIZ - sh_1;
+
+ /*
+ * Make srcp aligned by rounding it down to the beginning of the op_t
+ * it points in the middle of.
+ */
+ srcp &= -OPSIZ;
+ srcp += OPSIZ;
+
+ switch (len % 4) {
+ case 2:
+ srcp -= 3 * OPSIZ;
+ dstp -= 1 * OPSIZ;
+ a2 = ((op_t *) srcp)[2];
+ a1 = ((op_t *) srcp)[1];
+ len += 2;
+ goto do1;
+ case 3:
+ srcp -= 4 * OPSIZ;
+ dstp -= 2 * OPSIZ;
+ a3 = ((op_t *) srcp)[3];
+ a2 = ((op_t *) srcp)[2];
+ len += 1;
+ goto do2;
+ case 0:
+ if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0)
+ return;
+ srcp -= 5 * OPSIZ;
+ dstp -= 3 * OPSIZ;
+ a0 = ((op_t *) srcp)[4];
+ a3 = ((op_t *) srcp)[3];
+ goto do3;
+ case 1:
+ srcp -= 6 * OPSIZ;
+ dstp -= 4 * OPSIZ;
+ a1 = ((op_t *) srcp)[5];
+ a0 = ((op_t *) srcp)[4];
+ len -= 1;
+ if (OP_T_THRESHOLD <= 3 * OPSIZ && len == 0)
+ goto do0;
+ goto do4; /* No-op. */
+ }
+
+ do {
+do4:
+ a3 = ((op_t *) srcp)[3];
+ ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2);
+do3:
+ a2 = ((op_t *) srcp)[2];
+ ((op_t *) dstp)[2] = MERGE (a3, sh_1, a0, sh_2);
+do2:
+ a1 = ((op_t *) srcp)[1];
+ ((op_t *) dstp)[1] = MERGE (a2, sh_1, a3, sh_2);
+do1:
+ a0 = ((op_t *) srcp)[0];
+ ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2);
+
+ srcp -= 4 * OPSIZ;
+ dstp -= 4 * OPSIZ;
+ len -= 4;
+ } while (len != 0);
+
+ /*
+ * This is the right position for do0. Please don't move it into
+ * the loop.
+ */
+do0:
+ ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2);
+}
+