123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315 |
- .macro HEXAGON_OPT_FUNC_BEGIN name
- .text
- .p2align 4
- .globl \name
- .type \name, @function
- \name:
- .endm
- .macro HEXAGON_OPT_FUNC_FINISH name
- .size \name, . - \name
- .endm
- #if __HEXAGON_ARCH__ < 3
- HEXAGON_OPT_FUNC_BEGIN memset
- {
- r6 = #8
- r7 = extractu(r0, #3 , #0)
- p0 = cmp.eq(r2, #0)
- p1 = cmp.gtu(r2, #7)
- }
- {
- r4 = vsplatb(r1)
- r8 = r0
- r9 = sub(r6, r7)
- if p0 jumpr r31
- }
- {
- r3 = #0
- r7 = #0
- p0 = tstbit(r9, #0)
- if p1 jump 2f
- }
- loop0(1f, r2)
- .falign
- 1:
- {
- memb(r8++#1) = r4
- }:endloop0
- jumpr r31
- .falign
- 2:
- {
- r6 = #1
- p0 = tstbit(r9, #1)
- p1 = cmp.eq(r2, #1)
- if !p0 jump 3f
- }
- {
- memb(r8++#1) = r4
- r3:2 = sub(r3:2, r7:6)
- if p1 jumpr r31
- }
- .falign
- 3:
- {
- r6 = #2
- p0 = tstbit(r9, #2)
- p1 = cmp.eq(r2, #2)
- if !p0 jump 4f
- }
- {
- memh(r8++#2) = r4
- r3:2 = sub(r3:2, r7:6)
- if p1 jumpr r31
- }
- .falign
- 4:
- {
- r6 = #4
- p0 = cmp.gtu(r2, #7)
- p1 = cmp.eq(r2, #4)
- if !p0 jump 5f
- }
- {
- memw(r8++#4) = r4
- r3:2 = sub(r3:2, r7:6)
- p0 = cmp.gtu(r2, #11)
- if p1 jumpr r31
- }
- .falign
- 5:
- {
- r10 = lsr(r2, #3)
- p1 = cmp.eq(r3, #1)
- if !p0 jump 7f
- }
- {
- r5 = r4
- r6 = #8
- loop0(6f, r10)
- }
- .falign
- 6:
- {
- memd(r8++#8) = r5:4
- r3:2 = sub(r3:2, r7:6)
- p1 = cmp.eq(r2, #8)
- }:endloop0
- .falign
- 7:
- {
- p0 = tstbit(r2, #2)
- if p1 jumpr r31
- }
- {
- r6 = #4
- p0 = tstbit(r2, #1)
- p1 = cmp.eq(r2, #4)
- if !p0 jump 8f
- }
- {
- memw(r8++#4) = r4
- r3:2 = sub(r3:2, r7:6)
- if p1 jumpr r31
- }
- .falign
- 8:
- {
- p1 = cmp.eq(r2, #2)
- if !p0 jump 9f
- }
- {
- memh(r8++#2) = r4
- if p1 jumpr r31
- }
- .falign
- 9:
- {
- memb(r8++#1) = r4
- jumpr r31
- }
- HEXAGON_OPT_FUNC_FINISH memset
- #endif
- #if __HEXAGON_ARCH__ >= 3
- HEXAGON_OPT_FUNC_BEGIN memset
- {
- r7=vsplatb(r1)
- r6 = r0
- if (r2==#0) jump:nt .L1
- }
- {
- r5:4=combine(r7,r7)
- p0 = cmp.gtu(r2,#8)
- if (p0.new) jump:nt .L3
- }
- {
- r3 = r0
- loop0(.L47,r2)
- }
- .falign
- .L47:
- {
- memb(r3++#1) = r1
- }:endloop0
- jumpr r31
- .L3:
- {
- p0 = tstbit(r0,#0)
- if (!p0.new) jump:nt .L8
- p1 = cmp.eq(r2, #1)
- }
- {
- r6 = add(r0, #1)
- r2 = add(r2,#-1)
- memb(r0) = r1
- if (p1) jump .L1
- }
- .L8:
- {
- p0 = tstbit(r6,#1)
- if (!p0.new) jump:nt .L10
- }
- {
- r2 = add(r2,#-2)
- memh(r6++#2) = r7
- p0 = cmp.eq(r2, #2)
- if (p0.new) jump:nt .L1
- }
- .L10:
- {
- p0 = tstbit(r6,#2)
- if (!p0.new) jump:nt .L12
- }
- {
- r2 = add(r2,#-4)
- memw(r6++#4) = r7
- p0 = cmp.eq(r2, #4)
- if (p0.new) jump:nt .L1
- }
- .L12:
- {
- p0 = cmp.gtu(r2,#127)
- if (!p0.new) jump:nt .L14
- }
- r3 = and(r6,#31)
- if (r3==#0) jump:nt .L17
- {
- memd(r6++#8) = r5:4
- r2 = add(r2,#-8)
- }
- r3 = and(r6,#31)
- if (r3==#0) jump:nt .L17
- {
- memd(r6++#8) = r5:4
- r2 = add(r2,#-8)
- }
- r3 = and(r6,#31)
- if (r3==#0) jump:nt .L17
- {
- memd(r6++#8) = r5:4
- r2 = add(r2,#-8)
- }
- .L17:
- {
- r3 = lsr(r2,#5)
- if (r1!=#0) jump:nt .L18
- }
- {
- r8 = r3
- r3 = r6
- loop0(.L46,r3)
- }
- .falign
- .L46:
- {
- dczeroa(r6)
- r6 = add(r6,#32)
- r2 = add(r2,#-32)
- }:endloop0
- .L14:
- {
- p0 = cmp.gtu(r2,#7)
- if (!p0.new) jump:nt .L28
- r8 = lsr(r2,#3)
- }
- loop0(.L44,r8)
- .falign
- .L44:
- {
- memd(r6++#8) = r5:4
- r2 = add(r2,#-8)
- }:endloop0
- .L28:
- {
- p0 = tstbit(r2,#2)
- if (!p0.new) jump:nt .L33
- }
- {
- r2 = add(r2,#-4)
- memw(r6++#4) = r7
- }
- .L33:
- {
- p0 = tstbit(r2,#1)
- if (!p0.new) jump:nt .L35
- }
- {
- r2 = add(r2,#-2)
- memh(r6++#2) = r7
- }
- .L35:
- p0 = cmp.eq(r2,#1)
- if (p0) memb(r6) = r1
- .L1:
- jumpr r31
- .L18:
- loop0(.L45,r3)
- .falign
- .L45:
- dczeroa(r6)
- {
- memd(r6++#8) = r5:4
- r2 = add(r2,#-32)
- }
- memd(r6++#8) = r5:4
- memd(r6++#8) = r5:4
- {
- memd(r6++#8) = r5:4
- }:endloop0
- jump .L14
- HEXAGON_OPT_FUNC_FINISH memset
- #endif
|