From 8e3d8433d8c22ca6c42cba4a67d300c39aae7822 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 14 Nov 2006 21:18:18 -0800 Subject: [NET]: MIPS checksum annotations and cleanups. * sanitize prototypes, annotate * kill shift-by-16 in checksum calculations * htons->shift in l-e checksum calculations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- arch/mips/lib/csum_partial_copy.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/mips/lib') diff --git a/arch/mips/lib/csum_partial_copy.c b/arch/mips/lib/csum_partial_copy.c index 6e9f366f961..1720f2ceeea 100644 --- a/arch/mips/lib/csum_partial_copy.c +++ b/arch/mips/lib/csum_partial_copy.c @@ -16,8 +16,8 @@ /* * copy while checksumming, otherwise like csum_partial */ -unsigned int csum_partial_copy_nocheck(const unsigned char *src, - unsigned char *dst, int len, unsigned int sum) +__wsum csum_partial_copy_nocheck(const void *src, + void *dst, int len, __wsum sum) { /* * It's 2:30 am and I don't feel like doing it real ... @@ -33,8 +33,8 @@ unsigned int csum_partial_copy_nocheck(const unsigned char *src, * Copy from userspace and compute checksum. If we catch an exception * then zero the rest of the buffer. */ -unsigned int csum_partial_copy_from_user (const unsigned char __user *src, - unsigned char *dst, int len, unsigned int sum, int *err_ptr) +__wsum csum_partial_copy_from_user (const void __user *src, + void *dst, int len, __wsum sum, int *err_ptr) { int missing; -- cgit v1.2.3 From 0bcdda0f3a87ed684d46841b6069409e39c4af65 Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Mon, 4 Dec 2006 00:42:59 +0900 Subject: [MIPS] Unify csum_partial.S The 32-bit version and 64-bit version are almost equal. Unify them. This makes further improvements (for example, copying with parallel, supporting PREFETCH, etc.) easier. Signed-off-by: Atsushi Nemoto Signed-off-by: Ralf Baechle --- arch/mips/lib/Makefile | 4 +- arch/mips/lib/csum_partial.S | 258 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 260 insertions(+), 2 deletions(-) create mode 100644 arch/mips/lib/csum_partial.S (limited to 'arch/mips/lib') diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index b225543f530..888b61ea12f 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile @@ -2,8 +2,8 @@ # Makefile for MIPS-specific library files.. # -lib-y += csum_partial_copy.o memcpy.o promlib.o strlen_user.o strncpy_user.o \ - strnlen_user.o uncached.o +lib-y += csum_partial.o csum_partial_copy.o memcpy.o promlib.o \ + strlen_user.o strncpy_user.o strnlen_user.o uncached.o obj-y += iomap.o diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S new file mode 100644 index 00000000000..15611d9df7a --- /dev/null +++ b/arch/mips/lib/csum_partial.S @@ -0,0 +1,258 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Quick'n'dirty IP checksum ... + * + * Copyright (C) 1998, 1999 Ralf Baechle + * Copyright (C) 1999 Silicon Graphics, Inc. + */ +#include +#include + +#ifdef CONFIG_64BIT +#define T0 ta0 +#define T1 ta1 +#define T2 ta2 +#define T3 ta3 +#define T4 t0 +#define T7 t3 +#else +#define T0 t0 +#define T1 t1 +#define T2 t2 +#define T3 t3 +#define T4 t4 +#define T7 t7 +#endif + +#define ADDC(sum,reg) \ + addu sum, reg; \ + sltu v1, sum, reg; \ + addu sum, v1 + +#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ + lw _t0, (offset + 0x00)(src); \ + lw _t1, (offset + 0x04)(src); \ + lw _t2, (offset + 0x08)(src); \ + lw _t3, (offset + 0x0c)(src); \ + ADDC(sum, _t0); \ + ADDC(sum, _t1); \ + ADDC(sum, _t2); \ + ADDC(sum, _t3); \ + lw _t0, (offset + 0x10)(src); \ + lw _t1, (offset + 0x14)(src); \ + lw _t2, (offset + 0x18)(src); \ + lw _t3, (offset + 0x1c)(src); \ + ADDC(sum, _t0); \ + ADDC(sum, _t1); \ + ADDC(sum, _t2); \ + ADDC(sum, _t3); \ + +/* + * a0: source address + * a1: length of the area to checksum + * a2: partial checksum + */ + +#define src a0 +#define sum v0 + + .text + .set noreorder + +/* unknown src alignment and < 8 bytes to go */ +small_csumcpy: + move a1, T2 + + andi T0, a1, 4 + beqz T0, 1f + andi T0, a1, 2 + + /* Still a full word to go */ + ulw T1, (src) + PTR_ADDIU src, 4 + ADDC(sum, T1) + +1: move T1, zero + beqz T0, 1f + andi T0, a1, 1 + + /* Still a halfword to go */ + ulhu T1, (src) + PTR_ADDIU src, 2 + +1: beqz T0, 1f + sll T1, T1, 16 + + lbu T2, (src) + nop + +#ifdef __MIPSEB__ + sll T2, T2, 8 +#endif + or T1, T2 + +1: ADDC(sum, T1) + + /* fold checksum */ + sll v1, sum, 16 + addu sum, v1 + sltu v1, sum, v1 + srl sum, sum, 16 + addu sum, v1 + + /* odd buffer alignment? */ + beqz T7, 1f + nop + sll v1, sum, 8 + srl sum, sum, 8 + or sum, v1 + andi sum, 0xffff +1: + .set reorder + /* Add the passed partial csum. */ + ADDC(sum, a2) + jr ra + .set noreorder + +/* ------------------------------------------------------------------------- */ + + .align 5 +LEAF(csum_partial) + move sum, zero + move T7, zero + + sltiu t8, a1, 0x8 + bnez t8, small_csumcpy /* < 8 bytes to copy */ + move T2, a1 + + beqz a1, out + andi T7, src, 0x1 /* odd buffer? */ + +hword_align: + beqz T7, word_align + andi t8, src, 0x2 + + lbu T0, (src) + LONG_SUBU a1, a1, 0x1 +#ifdef __MIPSEL__ + sll T0, T0, 8 +#endif + ADDC(sum, T0) + PTR_ADDU src, src, 0x1 + andi t8, src, 0x2 + +word_align: + beqz t8, dword_align + sltiu t8, a1, 56 + + lhu T0, (src) + LONG_SUBU a1, a1, 0x2 + ADDC(sum, T0) + sltiu t8, a1, 56 + PTR_ADDU src, src, 0x2 + +dword_align: + bnez t8, do_end_words + move t8, a1 + + andi t8, src, 0x4 + beqz t8, qword_align + andi t8, src, 0x8 + + lw T0, 0x00(src) + LONG_SUBU a1, a1, 0x4 + ADDC(sum, T0) + PTR_ADDU src, src, 0x4 + andi t8, src, 0x8 + +qword_align: + beqz t8, oword_align + andi t8, src, 0x10 + + lw T0, 0x00(src) + lw T1, 0x04(src) + LONG_SUBU a1, a1, 0x8 + ADDC(sum, T0) + ADDC(sum, T1) + PTR_ADDU src, src, 0x8 + andi t8, src, 0x10 + +oword_align: + beqz t8, begin_movement + LONG_SRL t8, a1, 0x7 + + lw T3, 0x08(src) + lw T4, 0x0c(src) + lw T0, 0x00(src) + lw T1, 0x04(src) + ADDC(sum, T3) + ADDC(sum, T4) + ADDC(sum, T0) + ADDC(sum, T1) + LONG_SUBU a1, a1, 0x10 + PTR_ADDU src, src, 0x10 + LONG_SRL t8, a1, 0x7 + +begin_movement: + beqz t8, 1f + andi T2, a1, 0x40 + +move_128bytes: + CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4) + CSUM_BIGCHUNK(src, 0x20, sum, T0, T1, T3, T4) + CSUM_BIGCHUNK(src, 0x40, sum, T0, T1, T3, T4) + CSUM_BIGCHUNK(src, 0x60, sum, T0, T1, T3, T4) + LONG_SUBU t8, t8, 0x01 + bnez t8, move_128bytes + PTR_ADDU src, src, 0x80 + +1: + beqz T2, 1f + andi T2, a1, 0x20 + +move_64bytes: + CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4) + CSUM_BIGCHUNK(src, 0x20, sum, T0, T1, T3, T4) + PTR_ADDU src, src, 0x40 + +1: + beqz T2, do_end_words + andi t8, a1, 0x1c + +move_32bytes: + CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4) + andi t8, a1, 0x1c + PTR_ADDU src, src, 0x20 + +do_end_words: + beqz t8, maybe_end_cruft + LONG_SRL t8, t8, 0x2 + +end_words: + lw T0, (src) + LONG_SUBU t8, t8, 0x1 + ADDC(sum, T0) + bnez t8, end_words + PTR_ADDU src, src, 0x4 + +maybe_end_cruft: + andi T2, a1, 0x3 + +small_memcpy: + j small_csumcpy; move a1, T2 /* XXX ??? */ + beqz t2, out + move a1, T2 + +end_bytes: + lb T0, (src) + LONG_SUBU a1, a1, 0x1 + bnez a2, end_bytes + PTR_ADDU src, src, 0x1 + +out: + jr ra + move v0, sum + END(csum_partial) -- cgit v1.2.3 From 52ffe760ea9ec407292d093c3f06c1cda5187228 Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Fri, 8 Dec 2006 01:04:31 +0900 Subject: [MIPS] Make csum_partial more readable Use standard o32 register name instead of T0, T1, etc, like memcpy.S. Signed-off-by: Atsushi Nemoto Signed-off-by: Ralf Baechle --- arch/mips/lib/csum_partial.S | 144 ++++++++++++++++++++++--------------------- 1 file changed, 74 insertions(+), 70 deletions(-) (limited to 'arch/mips/lib') diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index 15611d9df7a..3bffdbb1c1f 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -12,19 +12,23 @@ #include #ifdef CONFIG_64BIT -#define T0 ta0 -#define T1 ta1 -#define T2 ta2 -#define T3 ta3 -#define T4 t0 -#define T7 t3 -#else -#define T0 t0 -#define T1 t1 -#define T2 t2 -#define T3 t3 -#define T4 t4 -#define T7 t7 +/* + * As we are sharing code base with the mips32 tree (which use the o32 ABI + * register definitions). We need to redefine the register definitions from + * the n64 ABI register naming to the o32 ABI register naming. + */ +#undef t0 +#undef t1 +#undef t2 +#undef t3 +#define t0 $8 +#define t1 $9 +#define t2 $10 +#define t3 $11 +#define t4 $12 +#define t5 $13 +#define t6 $14 +#define t7 $15 #endif #define ADDC(sum,reg) \ @@ -64,37 +68,37 @@ /* unknown src alignment and < 8 bytes to go */ small_csumcpy: - move a1, T2 + move a1, t2 - andi T0, a1, 4 - beqz T0, 1f - andi T0, a1, 2 + andi t0, a1, 4 + beqz t0, 1f + andi t0, a1, 2 /* Still a full word to go */ - ulw T1, (src) + ulw t1, (src) PTR_ADDIU src, 4 - ADDC(sum, T1) + ADDC(sum, t1) -1: move T1, zero - beqz T0, 1f - andi T0, a1, 1 +1: move t1, zero + beqz t0, 1f + andi t0, a1, 1 /* Still a halfword to go */ - ulhu T1, (src) + ulhu t1, (src) PTR_ADDIU src, 2 -1: beqz T0, 1f - sll T1, T1, 16 +1: beqz t0, 1f + sll t1, t1, 16 - lbu T2, (src) + lbu t2, (src) nop #ifdef __MIPSEB__ - sll T2, T2, 8 + sll t2, t2, 8 #endif - or T1, T2 + or t1, t2 -1: ADDC(sum, T1) +1: ADDC(sum, t1) /* fold checksum */ sll v1, sum, 16 @@ -104,7 +108,7 @@ small_csumcpy: addu sum, v1 /* odd buffer alignment? */ - beqz T7, 1f + beqz t7, 1f nop sll v1, sum, 8 srl sum, sum, 8 @@ -122,25 +126,25 @@ small_csumcpy: .align 5 LEAF(csum_partial) move sum, zero - move T7, zero + move t7, zero sltiu t8, a1, 0x8 bnez t8, small_csumcpy /* < 8 bytes to copy */ - move T2, a1 + move t2, a1 beqz a1, out - andi T7, src, 0x1 /* odd buffer? */ + andi t7, src, 0x1 /* odd buffer? */ hword_align: - beqz T7, word_align + beqz t7, word_align andi t8, src, 0x2 - lbu T0, (src) + lbu t0, (src) LONG_SUBU a1, a1, 0x1 #ifdef __MIPSEL__ - sll T0, T0, 8 + sll t0, t0, 8 #endif - ADDC(sum, T0) + ADDC(sum, t0) PTR_ADDU src, src, 0x1 andi t8, src, 0x2 @@ -148,9 +152,9 @@ word_align: beqz t8, dword_align sltiu t8, a1, 56 - lhu T0, (src) + lhu t0, (src) LONG_SUBU a1, a1, 0x2 - ADDC(sum, T0) + ADDC(sum, t0) sltiu t8, a1, 56 PTR_ADDU src, src, 0x2 @@ -162,9 +166,9 @@ dword_align: beqz t8, qword_align andi t8, src, 0x8 - lw T0, 0x00(src) + lw t0, 0x00(src) LONG_SUBU a1, a1, 0x4 - ADDC(sum, T0) + ADDC(sum, t0) PTR_ADDU src, src, 0x4 andi t8, src, 0x8 @@ -172,11 +176,11 @@ qword_align: beqz t8, oword_align andi t8, src, 0x10 - lw T0, 0x00(src) - lw T1, 0x04(src) + lw t0, 0x00(src) + lw t1, 0x04(src) LONG_SUBU a1, a1, 0x8 - ADDC(sum, T0) - ADDC(sum, T1) + ADDC(sum, t0) + ADDC(sum, t1) PTR_ADDU src, src, 0x8 andi t8, src, 0x10 @@ -184,46 +188,46 @@ oword_align: beqz t8, begin_movement LONG_SRL t8, a1, 0x7 - lw T3, 0x08(src) - lw T4, 0x0c(src) - lw T0, 0x00(src) - lw T1, 0x04(src) - ADDC(sum, T3) - ADDC(sum, T4) - ADDC(sum, T0) - ADDC(sum, T1) + lw t3, 0x08(src) + lw t4, 0x0c(src) + lw t0, 0x00(src) + lw t1, 0x04(src) + ADDC(sum, t3) + ADDC(sum, t4) + ADDC(sum, t0) + ADDC(sum, t1) LONG_SUBU a1, a1, 0x10 PTR_ADDU src, src, 0x10 LONG_SRL t8, a1, 0x7 begin_movement: beqz t8, 1f - andi T2, a1, 0x40 + andi t2, a1, 0x40 move_128bytes: - CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4) - CSUM_BIGCHUNK(src, 0x20, sum, T0, T1, T3, T4) - CSUM_BIGCHUNK(src, 0x40, sum, T0, T1, T3, T4) - CSUM_BIGCHUNK(src, 0x60, sum, T0, T1, T3, T4) + CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) + CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) + CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) + CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) LONG_SUBU t8, t8, 0x01 bnez t8, move_128bytes PTR_ADDU src, src, 0x80 1: - beqz T2, 1f - andi T2, a1, 0x20 + beqz t2, 1f + andi t2, a1, 0x20 move_64bytes: - CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4) - CSUM_BIGCHUNK(src, 0x20, sum, T0, T1, T3, T4) + CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) + CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4) PTR_ADDU src, src, 0x40 1: - beqz T2, do_end_words + beqz t2, do_end_words andi t8, a1, 0x1c move_32bytes: - CSUM_BIGCHUNK(src, 0x00, sum, T0, T1, T3, T4) + CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4) andi t8, a1, 0x1c PTR_ADDU src, src, 0x20 @@ -232,22 +236,22 @@ do_end_words: LONG_SRL t8, t8, 0x2 end_words: - lw T0, (src) + lw t0, (src) LONG_SUBU t8, t8, 0x1 - ADDC(sum, T0) + ADDC(sum, t0) bnez t8, end_words PTR_ADDU src, src, 0x4 maybe_end_cruft: - andi T2, a1, 0x3 + andi t2, a1, 0x3 small_memcpy: - j small_csumcpy; move a1, T2 /* XXX ??? */ + j small_csumcpy; move a1, t2 /* XXX ??? */ beqz t2, out - move a1, T2 + move a1, t2 end_bytes: - lb T0, (src) + lb t0, (src) LONG_SUBU a1, a1, 0x1 bnez a2, end_bytes PTR_ADDU src, src, 0x1 -- cgit v1.2.3 From 773ff78838ca3c07245e45c06235e0baaa5f710a Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Fri, 8 Dec 2006 01:04:45 +0900 Subject: [MIPS] Optimize flow of csum_partial Delete dead codes at end of the function and move small_csumcopy there. This makes some labels (maybe_end_cruft, small_memcpy, end_bytes, out) needless and eliminates some branches. Signed-off-by: Atsushi Nemoto Signed-off-by: Ralf Baechle --- arch/mips/lib/csum_partial.S | 129 ++++++++++++++++++------------------------- 1 file changed, 54 insertions(+), 75 deletions(-) (limited to 'arch/mips/lib') diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index 3bffdbb1c1f..b04475d76f3 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -65,64 +65,6 @@ .text .set noreorder - -/* unknown src alignment and < 8 bytes to go */ -small_csumcpy: - move a1, t2 - - andi t0, a1, 4 - beqz t0, 1f - andi t0, a1, 2 - - /* Still a full word to go */ - ulw t1, (src) - PTR_ADDIU src, 4 - ADDC(sum, t1) - -1: move t1, zero - beqz t0, 1f - andi t0, a1, 1 - - /* Still a halfword to go */ - ulhu t1, (src) - PTR_ADDIU src, 2 - -1: beqz t0, 1f - sll t1, t1, 16 - - lbu t2, (src) - nop - -#ifdef __MIPSEB__ - sll t2, t2, 8 -#endif - or t1, t2 - -1: ADDC(sum, t1) - - /* fold checksum */ - sll v1, sum, 16 - addu sum, v1 - sltu v1, sum, v1 - srl sum, sum, 16 - addu sum, v1 - - /* odd buffer alignment? */ - beqz t7, 1f - nop - sll v1, sum, 8 - srl sum, sum, 8 - or sum, v1 - andi sum, 0xffff -1: - .set reorder - /* Add the passed partial csum. */ - ADDC(sum, a2) - jr ra - .set noreorder - -/* ------------------------------------------------------------------------- */ - .align 5 LEAF(csum_partial) move sum, zero @@ -132,8 +74,7 @@ LEAF(csum_partial) bnez t8, small_csumcpy /* < 8 bytes to copy */ move t2, a1 - beqz a1, out - andi t7, src, 0x1 /* odd buffer? */ + andi t7, src, 0x1 /* odd buffer? */ hword_align: beqz t7, word_align @@ -232,8 +173,9 @@ move_32bytes: PTR_ADDU src, src, 0x20 do_end_words: - beqz t8, maybe_end_cruft - LONG_SRL t8, t8, 0x2 + beqz t8, small_csumcpy + andi t2, a1, 0x3 + LONG_SRL t8, t8, 0x2 end_words: lw t0, (src) @@ -242,21 +184,58 @@ end_words: bnez t8, end_words PTR_ADDU src, src, 0x4 -maybe_end_cruft: - andi t2, a1, 0x3 +/* unknown src alignment and < 8 bytes to go */ +small_csumcpy: + move a1, t2 -small_memcpy: - j small_csumcpy; move a1, t2 /* XXX ??? */ - beqz t2, out - move a1, t2 + andi t0, a1, 4 + beqz t0, 1f + andi t0, a1, 2 -end_bytes: - lb t0, (src) - LONG_SUBU a1, a1, 0x1 - bnez a2, end_bytes - PTR_ADDU src, src, 0x1 + /* Still a full word to go */ + ulw t1, (src) + PTR_ADDIU src, 4 + ADDC(sum, t1) + +1: move t1, zero + beqz t0, 1f + andi t0, a1, 1 + + /* Still a halfword to go */ + ulhu t1, (src) + PTR_ADDIU src, 2 + +1: beqz t0, 1f + sll t1, t1, 16 + + lbu t2, (src) + nop + +#ifdef __MIPSEB__ + sll t2, t2, 8 +#endif + or t1, t2 + +1: ADDC(sum, t1) -out: + /* fold checksum */ + sll v1, sum, 16 + addu sum, v1 + sltu v1, sum, v1 + srl sum, sum, 16 + addu sum, v1 + + /* odd buffer alignment? */ + beqz t7, 1f + nop + sll v1, sum, 8 + srl sum, sum, 8 + or sum, v1 + andi sum, 0xffff +1: + .set reorder + /* Add the passed partial csum. */ + ADDC(sum, a2) jr ra - move v0, sum + .set noreorder END(csum_partial) -- cgit v1.2.3 From ed99e2bc1dc5dc54eb5a019f4975562dbef20103 Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Fri, 8 Dec 2006 01:04:51 +0900 Subject: [MIPS] Optimize csum_partial for 64bit kernel Make csum_partial 64-bit powered. Signed-off-by: Atsushi Nemoto Signed-off-by: Ralf Baechle --- arch/mips/lib/csum_partial.S | 76 +++++++++++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 22 deletions(-) (limited to 'arch/mips/lib') diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index b04475d76f3..9db357294be 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -29,30 +29,49 @@ #define t5 $13 #define t6 $14 #define t7 $15 + +#define USE_DOUBLE #endif +#ifdef USE_DOUBLE + +#define LOAD ld +#define ADD daddu +#define NBYTES 8 + +#else + +#define LOAD lw +#define ADD addu +#define NBYTES 4 + +#endif /* USE_DOUBLE */ + +#define UNIT(unit) ((unit)*NBYTES) + #define ADDC(sum,reg) \ - addu sum, reg; \ + ADD sum, reg; \ sltu v1, sum, reg; \ - addu sum, v1 + ADD sum, v1 -#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ - lw _t0, (offset + 0x00)(src); \ - lw _t1, (offset + 0x04)(src); \ - lw _t2, (offset + 0x08)(src); \ - lw _t3, (offset + 0x0c)(src); \ - ADDC(sum, _t0); \ - ADDC(sum, _t1); \ - ADDC(sum, _t2); \ - ADDC(sum, _t3); \ - lw _t0, (offset + 0x10)(src); \ - lw _t1, (offset + 0x14)(src); \ - lw _t2, (offset + 0x18)(src); \ - lw _t3, (offset + 0x1c)(src); \ +#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ + LOAD _t0, (offset + UNIT(0))(src); \ + LOAD _t1, (offset + UNIT(1))(src); \ + LOAD _t2, (offset + UNIT(2))(src); \ + LOAD _t3, (offset + UNIT(3))(src); \ ADDC(sum, _t0); \ ADDC(sum, _t1); \ ADDC(sum, _t2); \ - ADDC(sum, _t3); \ + ADDC(sum, _t3) + +#ifdef USE_DOUBLE +#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ + CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) +#else +#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \ + CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \ + CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3) +#endif /* * a0: source address @@ -117,11 +136,17 @@ qword_align: beqz t8, oword_align andi t8, src, 0x10 +#ifdef USE_DOUBLE + ld t0, 0x00(src) + LONG_SUBU a1, a1, 0x8 + ADDC(sum, t0) +#else lw t0, 0x00(src) lw t1, 0x04(src) LONG_SUBU a1, a1, 0x8 ADDC(sum, t0) ADDC(sum, t1) +#endif PTR_ADDU src, src, 0x8 andi t8, src, 0x10 @@ -129,14 +154,14 @@ oword_align: beqz t8, begin_movement LONG_SRL t8, a1, 0x7 - lw t3, 0x08(src) - lw t4, 0x0c(src) - lw t0, 0x00(src) - lw t1, 0x04(src) - ADDC(sum, t3) - ADDC(sum, t4) +#ifdef USE_DOUBLE + ld t0, 0x00(src) + ld t1, 0x08(src) ADDC(sum, t0) ADDC(sum, t1) +#else + CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4) +#endif LONG_SUBU a1, a1, 0x10 PTR_ADDU src, src, 0x10 LONG_SRL t8, a1, 0x7 @@ -219,6 +244,13 @@ small_csumcpy: 1: ADDC(sum, t1) /* fold checksum */ +#ifdef USE_DOUBLE + dsll32 v1, sum, 0 + daddu sum, v1 + sltu v1, sum, v1 + dsra32 sum, sum, 0 + addu sum, v1 +#endif sll v1, sum, 16 addu sum, v1 sltu v1, sum, v1 -- cgit v1.2.3 From ae32ffd65bbcc32795bb9b58ed12941efeb03dff Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sun, 10 Dec 2006 15:05:11 +0000 Subject: [MIPS] Export csum_partial_copy_nocheck. ibmtr.c and typhoon.c use it. Signed-off-by: Ralf Baechle --- arch/mips/lib/csum_partial_copy.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/mips/lib') diff --git a/arch/mips/lib/csum_partial_copy.c b/arch/mips/lib/csum_partial_copy.c index 1720f2ceeea..06771040a26 100644 --- a/arch/mips/lib/csum_partial_copy.c +++ b/arch/mips/lib/csum_partial_copy.c @@ -7,6 +7,7 @@ * Copyright (C) 1998, 1999 Ralf Baechle */ #include +#include #include #include #include @@ -29,6 +30,8 @@ __wsum csum_partial_copy_nocheck(const void *src, return sum; } +EXPORT_SYMBOL(csum_partial_copy_nocheck); + /* * Copy from userspace and compute checksum. If we catch an exception * then zero the rest of the buffer. -- cgit v1.2.3