linux-brain/arch/hexagon/lib/memset.S
Thomas Gleixner 08dbd0f8ef treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 267
Based on 1 normalized pattern(s):

  this program is free software you can redistribute it and or modify
  it under the terms of the gnu general public license version 2 and
  only version 2 as published by the free software foundation this
  program is distributed in the hope that it will be useful but
  without any warranty without even the implied warranty of
  merchantability or fitness for a particular purpose see the gnu
  general public license for more details you should have received a
  copy of the gnu general public license along with this program if
  not write to the free software foundation inc 51 franklin street
  fifth floor boston ma 02110 1301 usa

extracted by the scancode license scanner the SPDX license identifier

  GPL-2.0-only

has been chosen to replace the boilerplate/reference in 94 file(s).

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Allison Randal <allison@lohutok.net>
Reviewed-by: Richard Fontana <rfontana@redhat.com>
Reviewed-by: Alexios Zavras <alexios.zavras@intel.com>
Cc: linux-spdx@vger.kernel.org
Link: https://lkml.kernel.org/r/20190529141334.043630402@linutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-06-05 17:30:29 +02:00

303 lines
4.5 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2011, The Linux Foundation. All rights reserved.
*/
/* HEXAGON assembly optimized memset */
/* Replaces the standard library function memset */
.macro HEXAGON_OPT_FUNC_BEGIN name
.text
.p2align 4
.globl \name
.type \name, @function
\name:
.endm
.macro HEXAGON_OPT_FUNC_FINISH name
.size \name, . - \name
.endm
/* FUNCTION: memset (v2 version) */
#if __HEXAGON_ARCH__ < 3
HEXAGON_OPT_FUNC_BEGIN memset
{
r6 = #8
r7 = extractu(r0, #3 , #0)
p0 = cmp.eq(r2, #0)
p1 = cmp.gtu(r2, #7)
}
{
r4 = vsplatb(r1)
r8 = r0 /* leave r0 intact for return val */
r9 = sub(r6, r7) /* bytes until double alignment */
if p0 jumpr r31 /* count == 0, so return */
}
{
r3 = #0
r7 = #0
p0 = tstbit(r9, #0)
if p1 jump 2f /* skip byte loop */
}
/* less than 8 bytes to set, so just set a byte at a time and return */
loop0(1f, r2) /* byte loop */
.falign
1: /* byte loop */
{
memb(r8++#1) = r4
}:endloop0
jumpr r31
.falign
2: /* skip byte loop */
{
r6 = #1
p0 = tstbit(r9, #1)
p1 = cmp.eq(r2, #1)
if !p0 jump 3f /* skip initial byte store */
}
{
memb(r8++#1) = r4
r3:2 = sub(r3:2, r7:6)
if p1 jumpr r31
}
.falign
3: /* skip initial byte store */
{
r6 = #2
p0 = tstbit(r9, #2)
p1 = cmp.eq(r2, #2)
if !p0 jump 4f /* skip initial half store */
}
{
memh(r8++#2) = r4
r3:2 = sub(r3:2, r7:6)
if p1 jumpr r31
}
.falign
4: /* skip initial half store */
{
r6 = #4
p0 = cmp.gtu(r2, #7)
p1 = cmp.eq(r2, #4)
if !p0 jump 5f /* skip initial word store */
}
{
memw(r8++#4) = r4
r3:2 = sub(r3:2, r7:6)
p0 = cmp.gtu(r2, #11)
if p1 jumpr r31
}
.falign
5: /* skip initial word store */
{
r10 = lsr(r2, #3)
p1 = cmp.eq(r3, #1)
if !p0 jump 7f /* skip double loop */
}
{
r5 = r4
r6 = #8
loop0(6f, r10) /* double loop */
}
/* set bytes a double word at a time */
.falign
6: /* double loop */
{
memd(r8++#8) = r5:4
r3:2 = sub(r3:2, r7:6)
p1 = cmp.eq(r2, #8)
}:endloop0
.falign
7: /* skip double loop */
{
p0 = tstbit(r2, #2)
if p1 jumpr r31
}
{
r6 = #4
p0 = tstbit(r2, #1)
p1 = cmp.eq(r2, #4)
if !p0 jump 8f /* skip final word store */
}
{
memw(r8++#4) = r4
r3:2 = sub(r3:2, r7:6)
if p1 jumpr r31
}
.falign
8: /* skip final word store */
{
p1 = cmp.eq(r2, #2)
if !p0 jump 9f /* skip final half store */
}
{
memh(r8++#2) = r4
if p1 jumpr r31
}
.falign
9: /* skip final half store */
{
memb(r8++#1) = r4
jumpr r31
}
HEXAGON_OPT_FUNC_FINISH memset
#endif
/* FUNCTION: memset (v3 and higher version) */
#if __HEXAGON_ARCH__ >= 3
HEXAGON_OPT_FUNC_BEGIN memset
{
r7=vsplatb(r1)
r6 = r0
if (r2==#0) jump:nt .L1
}
{
r5:4=combine(r7,r7)
p0 = cmp.gtu(r2,#8)
if (p0.new) jump:nt .L3
}
{
r3 = r0
loop0(.L47,r2)
}
.falign
.L47:
{
memb(r3++#1) = r1
}:endloop0 /* start=.L47 */
jumpr r31
.L3:
{
p0 = tstbit(r0,#0)
if (!p0.new) jump:nt .L8
p1 = cmp.eq(r2, #1)
}
{
r6 = add(r0, #1)
r2 = add(r2,#-1)
memb(r0) = r1
if (p1) jump .L1
}
.L8:
{
p0 = tstbit(r6,#1)
if (!p0.new) jump:nt .L10
}
{
r2 = add(r2,#-2)
memh(r6++#2) = r7
p0 = cmp.eq(r2, #2)
if (p0.new) jump:nt .L1
}
.L10:
{
p0 = tstbit(r6,#2)
if (!p0.new) jump:nt .L12
}
{
r2 = add(r2,#-4)
memw(r6++#4) = r7
p0 = cmp.eq(r2, #4)
if (p0.new) jump:nt .L1
}
.L12:
{
p0 = cmp.gtu(r2,#127)
if (!p0.new) jump:nt .L14
}
r3 = and(r6,#31)
if (r3==#0) jump:nt .L17
{
memd(r6++#8) = r5:4
r2 = add(r2,#-8)
}
r3 = and(r6,#31)
if (r3==#0) jump:nt .L17
{
memd(r6++#8) = r5:4
r2 = add(r2,#-8)
}
r3 = and(r6,#31)
if (r3==#0) jump:nt .L17
{
memd(r6++#8) = r5:4
r2 = add(r2,#-8)
}
.L17:
{
r3 = lsr(r2,#5)
if (r1!=#0) jump:nt .L18
}
{
r8 = r3
r3 = r6
loop0(.L46,r3)
}
.falign
.L46:
{
dczeroa(r6)
r6 = add(r6,#32)
r2 = add(r2,#-32)
}:endloop0 /* start=.L46 */
.L14:
{
p0 = cmp.gtu(r2,#7)
if (!p0.new) jump:nt .L28
r8 = lsr(r2,#3)
}
loop0(.L44,r8)
.falign
.L44:
{
memd(r6++#8) = r5:4
r2 = add(r2,#-8)
}:endloop0 /* start=.L44 */
.L28:
{
p0 = tstbit(r2,#2)
if (!p0.new) jump:nt .L33
}
{
r2 = add(r2,#-4)
memw(r6++#4) = r7
}
.L33:
{
p0 = tstbit(r2,#1)
if (!p0.new) jump:nt .L35
}
{
r2 = add(r2,#-2)
memh(r6++#2) = r7
}
.L35:
p0 = cmp.eq(r2,#1)
if (p0) memb(r6) = r1
.L1:
jumpr r31
.L18:
loop0(.L45,r3)
.falign
.L45:
dczeroa(r6)
{
memd(r6++#8) = r5:4
r2 = add(r2,#-32)
}
memd(r6++#8) = r5:4
memd(r6++#8) = r5:4
{
memd(r6++#8) = r5:4
}:endloop0 /* start=.L45 */
jump .L14
HEXAGON_OPT_FUNC_FINISH memset
#endif