[PATCH 2/9] MIPS: Optimise core library functions for microMIPS.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]


From: "Steven J. Hill" <sjhill@xxxxxxxx>

Optimise some of the core library functions to use microMIPS
instructions for binary size reduction. When the microMIPS ISA
is not being used, the library functions compiled to binary
are identical.

Signed-off-by: Steven J. Hill <sjhill@xxxxxxxx>
---
 arch/mips/include/asm/page.h |    6 +++
 arch/mips/lib/memcpy.S       |   17 ++++++--
 arch/mips/lib/memset.S       |   90 ++++++++++++++++++++++++++++++------------
 arch/mips/lib/strlen_user.S  |   13 ++++--
 arch/mips/lib/strncpy_user.S |   39 +++++++++---------
 arch/mips/lib/strnlen_user.S |   24 ++++++++---
 arch/mips/mm/page.c          |   26 ++++++------
 7 files changed, 147 insertions(+), 68 deletions(-)

diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
index da9bd7d..5767678 100644
--- a/arch/mips/include/asm/page.h
+++ b/arch/mips/include/asm/page.h
@@ -45,6 +45,12 @@
 #define HUGETLB_PAGE_ORDER	({BUILD_BUG(); 0; })
 #endif /* CONFIG_HUGETLB_PAGE */
 
+/*
+ * Clear and copy array sizes for micro-assembly of clear_page/copy_page.
+ */
+#define CLEAR_PAGE_ARRAY_SIZE	288
+#define COPY_PAGE_ARRAY_SIZE	1344
+
 #ifndef __ASSEMBLY__
 
 #include <linux/pfn.h>
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index 56a1f85..0fa4617 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -10,6 +10,7 @@
  * Copyright (C) 2002 Broadcom, Inc.
  *   memcpy/copy_user author: Mark Vandevoorde
  * Copyright (C) 2007  Maciej W. Rozycki
+ * Copyright (C) 2011  MIPS Technologies, Inc.
  *
  * Mnemonic names for arguments to memcpy/__copy_user
  */
@@ -21,16 +22,14 @@
  * end of memory on some systems.  It's also a seriously bad idea on non
  * dma-coherent systems.
  */
-#ifdef CONFIG_DMA_NONCOHERENT
-#undef CONFIG_CPU_HAS_PREFETCH
-#endif
-#ifdef CONFIG_MIPS_MALTA
+#if defined(CONFIG_DMA_NONCOHERENT) || defined(CONFIG_MIPS_MALTA)
 #undef CONFIG_CPU_HAS_PREFETCH
 #endif
 
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/regdef.h>
+#include <asm/page.h>
 
 #define dst a0
 #define src a1
@@ -564,3 +563,13 @@ LEAF(__rmemcpy)					/* a0=dst a1=src a2=len */
 	jr	ra
 	 move	a2, zero
 	END(__rmemcpy)
+
+LEAF(clear_page)
+1:	j	1b		/* Dummy, will be replaced. */
+	.space CLEAR_PAGE_ARRAY_SIZE
+	END(clear_page)
+
+LEAF(copy_page)
+1:	j	1b		/* Dummy, will be replaced. */
+	.space COPY_PAGE_ARRAY_SIZE
+	END(copy_page)
diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
index 606c8a9..a0df003 100644
--- a/arch/mips/lib/memset.S
+++ b/arch/mips/lib/memset.S
@@ -5,7 +5,8 @@
  *
  * Copyright (C) 1998, 1999, 2000 by Ralf Baechle
  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
- * Copyright (C) 2007  Maciej W. Rozycki
+ * Copyright (C) 2007 by Maciej W. Rozycki
+ * Copyright (C) 2011 MIPS Technologies, Inc.
  */
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
@@ -19,6 +20,14 @@
 #define LONG_S_R sdr
 #endif
 
+#ifdef CONFIG_CPU_MICROMIPS
+#define STORSIZE (LONGSIZE * 2)
+#define STORMASK (STORSIZE - 1)
+#else
+#define STORSIZE LONGSIZE
+#define STORMASK LONGMASK
+#endif
+
 #define EX(insn,reg,addr,handler)			\
 9:	insn	reg, addr;				\
 	.section __ex_table,"a"; 			\
@@ -26,23 +35,36 @@
 	.previous
 
 	.macro	f_fill64 dst, offset, val, fixup
-	EX(LONG_S, \val, (\offset +  0 * LONGSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset +  1 * LONGSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset +  2 * LONGSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset +  3 * LONGSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset +  4 * LONGSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset +  5 * LONGSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset +  6 * LONGSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset +  7 * LONGSIZE)(\dst), \fixup)
+#ifdef CONFIG_CPU_MICROMIPS
+	EX(swp, t8, (\offset + 0 * STORSIZE)(\dst), \fixup)
+	EX(swp, t8, (\offset + 1 * STORSIZE)(\dst), \fixup)
+	EX(swp, t8, (\offset + 2 * STORSIZE)(\dst), \fixup)
+	EX(swp, t8, (\offset + 3 * STORSIZE)(\dst), \fixup)
+#if LONGSIZE == 4
+	EX(swp, t8, (\offset + 4 * STORSIZE)(\dst), \fixup)
+	EX(swp, t8, (\offset + 5 * STORSIZE)(\dst), \fixup)
+	EX(swp, t8, (\offset + 6 * STORSIZE)(\dst), \fixup)
+	EX(swp, t8, (\offset + 7 * STORSIZE)(\dst), \fixup)
+#endif
+#else
+	EX(LONG_S, \val, (\offset +  0 * STORSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  1 * STORSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  2 * STORSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  3 * STORSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  4 * STORSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  5 * STORSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  6 * STORSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  7 * STORSIZE)(\dst), \fixup)
 #if LONGSIZE == 4
-	EX(LONG_S, \val, (\offset +  8 * LONGSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset +  9 * LONGSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset + 10 * LONGSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset + 11 * LONGSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset + 12 * LONGSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset + 13 * LONGSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset + 14 * LONGSIZE)(\dst), \fixup)
-	EX(LONG_S, \val, (\offset + 15 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  8 * STORSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  9 * STORSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 10 * STORSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 11 * STORSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 12 * STORSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 13 * STORSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 14 * STORSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 15 * STORSIZE)(\dst), \fixup)
+#endif
 #endif
 	.endm
 
@@ -71,16 +93,20 @@ LEAF(memset)
 1:
 
 FEXPORT(__bzero)
-	sltiu		t0, a2, LONGSIZE	/* very small region? */
+	sltiu		t0, a2, STORSIZE	/* very small region? */
 	bnez		t0, .Lsmall_memset
-	 andi		t0, a0, LONGMASK	/* aligned? */
+	 andi		t0, a0, STORMASK	/* aligned? */
 
+#ifdef CONFIG_CPU_MICROMIPS
+	move		t8, a1
+	move		t9, a1
+#endif
 #ifndef CONFIG_CPU_DADDI_WORKAROUNDS
 	beqz		t0, 1f
-	 PTR_SUBU	t0, LONGSIZE		/* alignment in bytes */
+	 PTR_SUBU	t0, STORSIZE		/* alignment in bytes */
 #else
 	.set		noat
-	li		AT, LONGSIZE
+	li		AT, STORSIZE
 	beqz		t0, 1f
 	 PTR_SUBU	t0, AT			/* alignment in bytes */
 	.set		at
@@ -99,7 +125,7 @@ FEXPORT(__bzero)
 1:	ori		t1, a2, 0x3f		/* # of full blocks */
 	xori		t1, 0x3f
 	beqz		t1, .Lmemset_partial	/* no block to fill */
-	 andi		t0, a2, 0x40-LONGSIZE
+	 andi		t0, a2, 0x40-STORSIZE
 
 	PTR_ADDU	t1, a0			/* end address */
 	.set		reorder
@@ -112,14 +138,26 @@ FEXPORT(__bzero)
 .Lmemset_partial:
 	R10KCBARRIER(0(ra))
 	PTR_LA		t1, 2f			/* where to start */
+#ifdef CONFIG_CPU_MICROMIPS
+	LONG_SRL	t7, t0, 1
+#if LONGSIZE == 4
+	PTR_SUBU	t1, t7
+#else
+	.set		noat
+	LONG_SRL	AT, t7, 1
+	PTR_SUBU	t1, AT
+	.set		at
+#endif
+#else
 #if LONGSIZE == 4
 	PTR_SUBU	t1, t0
 #else
 	.set		noat
-	LONG_SRL		AT, t0, 1
+	LONG_SRL	AT, t0, 1
 	PTR_SUBU	t1, AT
 	.set		at
 #endif
+#endif
 	jr		t1
 	 PTR_ADDU	a0, t0			/* dest ptr */
 
@@ -128,7 +166,7 @@ FEXPORT(__bzero)
 	.set		nomacro
 	f_fill64 a0, -64, a1, .Lpartial_fixup	/* ... but first do longs ... */
 2:	.set		pop
-	andi		a2, LONGMASK		/* At most one long to go */
+	andi		a2, STORMASK		/* At most one long to go */
 
 	beqz		a2, 1f
 	 PTR_ADDU	a0, a2			/* What's left */
@@ -169,7 +207,7 @@ FEXPORT(__bzero)
 
 .Lpartial_fixup:
 	PTR_L		t0, TI_TASK($28)
-	andi		a2, LONGMASK
+	andi		a2, STORMASK
 	LONG_L		t0, THREAD_BUADDR(t0)
 	LONG_ADDU	a2, t1
 	jr		ra
@@ -177,4 +215,4 @@ FEXPORT(__bzero)
 
 .Llast_fixup:
 	jr		ra
-	 andi		v1, a2, LONGMASK
+	 andi		v1, a2, STORMASK
diff --git a/arch/mips/lib/strlen_user.S b/arch/mips/lib/strlen_user.S
index fdbb970..60fa23b 100644
--- a/arch/mips/lib/strlen_user.S
+++ b/arch/mips/lib/strlen_user.S
@@ -3,8 +3,9 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 1996, 1998, 1999, 2004 by Ralf Baechle
- * Copyright (c) 1999 Silicon Graphics, Inc.
+ * Copyright (C) 1996, 1998, 1999, 2004 by Ralf Baechle
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ * Copyright (C) 2011 MIPS Technologies, Inc.
  */
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
@@ -28,9 +29,13 @@ LEAF(__strlen_user_asm)
 
 FEXPORT(__strlen_user_nocheck_asm)
 	move		v0, a0
-1:	EX(lb, t0, (v0), .Lfault)
+#ifdef CONFIG_CPU_MICROMIPS
+1:	EX(lbu16, v1, (v0), .Lfault)
+#else
+1:	EX(lb, v1, (v0), .Lfault)
+#endif
 	PTR_ADDIU	v0, 1
-	bnez		t0, 1b
+	bnez		v1, 1b
 	PTR_SUBU	v0, a0
 	jr		ra
 	END(__strlen_user_asm)
diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S
index 7201b2f..bcbb9a0 100644
--- a/arch/mips/lib/strncpy_user.S
+++ b/arch/mips/lib/strncpy_user.S
@@ -3,7 +3,8 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 1996, 1999 by Ralf Baechle
+ * Copyright (C) 1996, 1999 by Ralf Baechle
+ * Copyright (C) 2011 MIPS Technologies, Inc.
  */
 #include <linux/errno.h>
 #include <asm/asm.h>
@@ -30,30 +31,32 @@
 LEAF(__strncpy_from_user_asm)
 	LONG_L		v0, TI_ADDR_LIMIT($28)	# pointer ok?
 	and		v0, a1
+#ifdef CONFIG_CPU_MICROMIPS
+	bnezc		v0, .Lfault
+#else
 	bnez		v0, .Lfault
+#endif
 
 FEXPORT(__strncpy_from_user_nocheck_asm)
-	move		v0, zero
-	move		v1, a1
 	.set		noreorder
-1:	EX(lbu, t0, (v1), .Lfault)
+	move		t0, zero
+	move		v1, a1
+1:	EX(lbu, v0, (v1), .Lfault)
 	PTR_ADDIU	v1, 1
 	R10KCBARRIER(0(ra))
-	beqz		t0, 2f
-	 sb		t0, (a0)
-	PTR_ADDIU	v0, 1
-	.set		reorder
-	PTR_ADDIU	a0, 1
-	bne		v0, a2, 1b
-2:	PTR_ADDU	t0, a1, v0
-	xor		t0, a1
-	bltz		t0, .Lfault
+	beqz		v0, 2f
+	 sb		v0, (a0)
+	PTR_ADDIU	t0, 1
+	bne		t0, a2, 1b
+	 PTR_ADDIU	a0, 1
+2:	PTR_ADDU	v0, a1, t0
+	xor		v0, a1
+	bltz		v0, .Lfault
+	 nop
 	jr		ra			# return n
+	move		v0, t0
 	END(__strncpy_from_user_asm)
 
-.Lfault:	li		v0, -EFAULT
+.Lfault:
 	jr		ra
-
-	.section	__ex_table,"a"
-	PTR		1b, .Lfault
-	.previous
+	 li		v0, -EFAULT
diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S
index 6445716..9090ced 100644
--- a/arch/mips/lib/strnlen_user.S
+++ b/arch/mips/lib/strnlen_user.S
@@ -5,6 +5,7 @@
  *
  * Copyright (c) 1996, 1998, 1999, 2004 by Ralf Baechle
  * Copyright (c) 1999 Silicon Graphics, Inc.
+ * Copyright (C) 2011 MIPS Technologies, Inc.
  */
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
@@ -26,21 +27,34 @@
  *       the maximum is a tad hairier ...
  */
 LEAF(__strnlen_user_asm)
+	.set	noreorder
 	LONG_L		v0, TI_ADDR_LIMIT($28)	# pointer ok?
 	and		v0, a0
+#ifdef CONFIG_CPU_MICROMIPS
+	bnezc		v0, .Lfault
+#else
 	bnez		v0, .Lfault
+#endif
 
 FEXPORT(__strnlen_user_nocheck_asm)
-	move		v0, a0
 	PTR_ADDU	a1, a0			# stop pointer
+	move		v0, a0
 1:	beq		v0, a1, 1f		# limit reached?
+	 nop
 	EX(lb, t0, (v0), .Lfault)
-	PTR_ADDU	v0, 1
+#ifdef CONFIG_CPU_MICROMIPS
+	addius5		v0, 1
+	bnezc		t0, 1b
+1:	jr		ra
+	PTR_SUBU	v0, a0
+#else
 	bnez		t0, 1b
-1:	PTR_SUBU	v0, a0
-	jr		ra
+	PTR_ADDU	v0, 1
+1:      jr              ra
+	PTR_SUBU        v0, a0
+#endif
 	END(__strnlen_user_asm)
 
 .Lfault:
-	move		v0, zero
 	jr		ra
+	move		v0, zero
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index cc0b626..be71d38 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -6,6 +6,7 @@
  * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@xxxxxxxxxxxxxx)
  * Copyright (C) 2007  Maciej W. Rozycki
  * Copyright (C) 2008  Thiemo Seufer
+ * Copyright (C) 2011  MIPS Technologies, Inc.
  */
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -79,17 +80,12 @@ static struct uasm_reloc __cpuinitdata relocs[5];
  * R4600 v2.0:				0x060 bytes
  * With prefetching, 16 word strides	0x120 bytes
  */
-
-static u32 clear_page_array[0x120 / 4];
+u32 clear_page_array[CLEAR_PAGE_ARRAY_SIZE / 4];
 
 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
 void clear_page_cpu(void *page) __attribute__((alias("clear_page_array")));
-#else
-void clear_page(void *page) __attribute__((alias("clear_page_array")));
 #endif
 
-EXPORT_SYMBOL(clear_page);
-
 /*
  * Maximum sizes:
  *
@@ -98,17 +94,13 @@ EXPORT_SYMBOL(clear_page);
  * R4600 v2.0:				0x07c bytes
  * With prefetching, 16 word strides	0x540 bytes
  */
-static u32 copy_page_array[0x540 / 4];
+u32 copy_page_array[COPY_PAGE_ARRAY_SIZE / 4];
 
 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
 void
 copy_page_cpu(void *to, void *from) __attribute__((alias("copy_page_array")));
-#else
-void copy_page(void *to, void *from) __attribute__((alias("copy_page_array")));
 #endif
 
-EXPORT_SYMBOL(copy_page);
-
 
 static int pref_bias_clear_store __cpuinitdata;
 static int pref_bias_copy_load __cpuinitdata;
@@ -368,6 +360,12 @@ void __cpuinit build_clear_page(void)
 	for (i = 0; i < (buf - clear_page_array); i++)
 		pr_debug("\t.word 0x%08x\n", clear_page_array[i]);
 	pr_debug("\t.set pop\n");
+#ifdef CONFIG_CPU_MICROMIPS
+	memcpy(((u8 *)clear_page) - 1, clear_page_array,
+		ARRAY_SIZE(clear_page_array) * 4);
+#else
+	memcpy(clear_page, clear_page_array, ARRAY_SIZE(clear_page_array) * 4);
+#endif
 }
 
 static void __cpuinit build_copy_load(u32 **buf, int reg, int off)
@@ -607,6 +605,12 @@ void __cpuinit build_copy_page(void)
 	for (i = 0; i < (buf - copy_page_array); i++)
 		pr_debug("\t.word 0x%08x\n", copy_page_array[i]);
 	pr_debug("\t.set pop\n");
+#ifdef CONFIG_CPU_MICROMIPS
+	memcpy(((u8 *)copy_page) - 1, copy_page_array,
+		ARRAY_SIZE(copy_page_array) * 4);
+#else
+	memcpy(copy_page, copy_page_array, ARRAY_SIZE(copy_page_array) * 4);
+#endif
 }
 
 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
-- 
1.7.9.6




[Linux MIPS Home]     [LKML Archive]     [Linux ARM]     [Linux]     [Git]     [Photo]     [Yosemite News]     [Linux SCSI]     [Linux Hams]

Add to Google Powered by Linux