parisc: Add assembly implementations for memset, strlen, strcpy, strncpy and strcat

Add performance-optimized versions of some string functions.

Signed-off-by: Helge Deller <deller@gmx.de>
Tested-by: Sven Schnelle <svens@stackframe.org>
This commit is contained in:
Helge Deller 2019-02-06 23:21:10 +01:00
parent ec4d396b63
commit 83af58f806
5 changed files with 157 additions and 93 deletions

View File

@ -8,4 +8,19 @@ extern void * memset(void *, int, size_t);
#define __HAVE_ARCH_MEMCPY
void * memcpy(void * dest,const void *src,size_t count);
#define __HAVE_ARCH_STRLEN
extern size_t strlen(const char *s);
#define __HAVE_ARCH_STRCPY
extern char *strcpy(char *dest, const char *src);
#define __HAVE_ARCH_STRNCPY
extern char *strncpy(char *dest, const char *src, size_t count);
#define __HAVE_ARCH_STRCAT
extern char *strcat(char *dest, const char *src);
#define __HAVE_ARCH_MEMSET
extern void *memset(void *, int, size_t);
#endif

View File

@ -17,6 +17,10 @@
#include <linux/string.h>
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(strlen);
EXPORT_SYMBOL(strcpy);
EXPORT_SYMBOL(strncpy);
EXPORT_SYMBOL(strcat);
#include <linux/atomic.h>
EXPORT_SYMBOL(__xchg8);

View File

@ -3,7 +3,7 @@
# Makefile for parisc-specific library files
#
lib-y := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \
ucmpdi2.o delay.o
lib-y := lusercopy.o bitops.o checksum.o io.o memcpy.o \
ucmpdi2.o delay.o string.o
obj-y := iomap.o

View File

@ -1,91 +0,0 @@
/* Copyright (C) 1991, 1997 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
/* Slight modifications for pa-risc linux - Paul Bame <bame@debian.org> */
#include <linux/types.h>
#include <asm/string.h>
#define OPSIZ (BITS_PER_LONG/8)
typedef unsigned long op_t;
void *
memset (void *dstpp, int sc, size_t len)
{
unsigned int c = sc;
long int dstp = (long int) dstpp;
if (len >= 8)
{
size_t xlen;
op_t cccc;
cccc = (unsigned char) c;
cccc |= cccc << 8;
cccc |= cccc << 16;
if (OPSIZ > 4)
/* Do the shift in two steps to avoid warning if long has 32 bits. */
cccc |= (cccc << 16) << 16;
/* There are at least some bytes to set.
No need to test for LEN == 0 in this alignment loop. */
while (dstp % OPSIZ != 0)
{
((unsigned char *) dstp)[0] = c;
dstp += 1;
len -= 1;
}
/* Write 8 `op_t' per iteration until less than 8 `op_t' remain. */
xlen = len / (OPSIZ * 8);
while (xlen > 0)
{
((op_t *) dstp)[0] = cccc;
((op_t *) dstp)[1] = cccc;
((op_t *) dstp)[2] = cccc;
((op_t *) dstp)[3] = cccc;
((op_t *) dstp)[4] = cccc;
((op_t *) dstp)[5] = cccc;
((op_t *) dstp)[6] = cccc;
((op_t *) dstp)[7] = cccc;
dstp += 8 * OPSIZ;
xlen -= 1;
}
len %= OPSIZ * 8;
/* Write 1 `op_t' per iteration until less than OPSIZ bytes remain. */
xlen = len / OPSIZ;
while (xlen > 0)
{
((op_t *) dstp)[0] = cccc;
dstp += OPSIZ;
xlen -= 1;
}
len %= OPSIZ;
}
/* Write the last few bytes. */
while (len > 0)
{
((unsigned char *) dstp)[0] = c;
dstp += 1;
len -= 1;
}
return dstpp;
}

136
arch/parisc/lib/string.S Normal file
View File

@ -0,0 +1,136 @@
// SPDX-License-Identifier: GPL-2.0
/*
* PA-RISC assembly string functions
*
* Copyright (C) 2019 Helge Deller <deller@gmx.de>
*/
#include <asm/assembly.h>
#include <linux/linkage.h>
.section .text.hot
.level PA_ASM_LEVEL
t0 = r20
t1 = r21
t2 = r22
ENTRY_CFI(strlen, frame=0,no_calls)
or,COND(<>) arg0,r0,ret0
b,l,n .Lstrlen_null_ptr,r0
depwi 0,31,2,ret0
cmpb,COND(<>) arg0,ret0,.Lstrlen_not_aligned
ldw,ma 4(ret0),t0
cmpib,tr 0,r0,.Lstrlen_loop
uxor,nbz r0,t0,r0
.Lstrlen_not_aligned:
uaddcm arg0,ret0,t1
shladd t1,3,r0,t1
mtsar t1
depwi -1,%sar,32,t0
uxor,nbz r0,t0,r0
.Lstrlen_loop:
b,l,n .Lstrlen_end_loop,r0
ldw,ma 4(ret0),t0
cmpib,tr 0,r0,.Lstrlen_loop
uxor,nbz r0,t0,r0
.Lstrlen_end_loop:
extrw,u,<> t0,7,8,r0
addib,tr,n -3,ret0,.Lstrlen_out
extrw,u,<> t0,15,8,r0
addib,tr,n -2,ret0,.Lstrlen_out
extrw,u,<> t0,23,8,r0
addi -1,ret0,ret0
.Lstrlen_out:
bv r0(rp)
uaddcm ret0,arg0,ret0
.Lstrlen_null_ptr:
bv,n r0(rp)
ENDPROC_CFI(strlen)
ENTRY_CFI(strcpy, frame=0,no_calls)
ldb 0(arg1),t0
stb t0,0(arg0)
ldo 0(arg0),ret0
ldo 1(arg1),t1
cmpb,= r0,t0,2f
ldo 1(arg0),t2
1: ldb 0(t1),arg1
stb arg1,0(t2)
ldo 1(t1),t1
cmpb,<> r0,arg1,1b
ldo 1(t2),t2
2: bv,n r0(rp)
ENDPROC_CFI(strcpy)
ENTRY_CFI(strncpy, frame=0,no_calls)
ldb 0(arg1),t0
stb t0,0(arg0)
ldo 1(arg1),t1
ldo 0(arg0),ret0
cmpb,= r0,t0,2f
ldo 1(arg0),arg1
1: ldo -1(arg2),arg2
cmpb,COND(=),n r0,arg2,2f
ldb 0(t1),arg0
stb arg0,0(arg1)
ldo 1(t1),t1
cmpb,<> r0,arg0,1b
ldo 1(arg1),arg1
2: bv,n r0(rp)
ENDPROC_CFI(strncpy)
ENTRY_CFI(strcat, frame=0,no_calls)
ldb 0(arg0),t0
cmpb,= t0,r0,2f
ldo 0(arg0),ret0
ldo 1(arg0),arg0
1: ldb 0(arg0),t1
cmpb,<>,n r0,t1,1b
ldo 1(arg0),arg0
2: ldb 0(arg1),t2
stb t2,0(arg0)
ldo 1(arg0),arg0
ldb 0(arg1),t0
cmpb,<> r0,t0,2b
ldo 1(arg1),arg1
bv,n r0(rp)
ENDPROC_CFI(strcat)
ENTRY_CFI(memset, frame=0,no_calls)
copy arg0,ret0
cmpb,COND(=) r0,arg0,4f
copy arg0,t2
cmpb,COND(=) r0,arg2,4f
ldo -1(arg2),arg3
subi -1,arg3,t0
subi 0,t0,t1
cmpiclr,COND(>=) 0,t1,arg2
ldo -1(t1),arg2
extru arg2,31,2,arg0
2: stb arg1,0(t2)
ldo 1(t2),t2
addib,>= -1,arg0,2b
ldo -1(arg3),arg3
cmpiclr,COND(<=) 4,arg2,r0
b,l,n 4f,r0
#ifdef CONFIG_64BIT
depd,* r0,63,2,arg2
#else
depw r0,31,2,arg2
#endif
ldo 1(t2),t2
3: stb arg1,-1(t2)
stb arg1,0(t2)
stb arg1,1(t2)
stb arg1,2(t2)
addib,COND(>) -4,arg2,3b
ldo 4(t2),t2
4: bv,n r0(rp)
ENDPROC_CFI(memset)
.end