123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429 |
- /*
- * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
- *
- * Author: Nicolas Pitre <nico@fluxnic.net>
- * - contributed to gcc-3.4 on Sep 30, 2003
- * - adapted for the Linux kernel on Oct 2, 2003
- */
- /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
- * SPDX-License-Identifier: GPL-2.0+
- */
- #include <linux/linkage.h>
- #include <asm/assembler.h>
- /*
- * U-Boot compatibility bit, define empty UNWIND() macro as, since we
- * do not support stack unwinding and define CONFIG_AEABI to make all
- * of the functions available without diverging from Linux code.
- */
- #ifdef __UBOOT__
- #define UNWIND(x...)
- #define CONFIG_AEABI
- #endif
- .macro ARM_DIV_BODY dividend, divisor, result, curbit
- #if __LINUX_ARM_ARCH__ >= 5
- clz \curbit, \divisor
- clz \result, \dividend
- sub \result, \curbit, \result
- mov \curbit, #1
- mov \divisor, \divisor, lsl \result
- mov \curbit, \curbit, lsl \result
- mov \result, #0
-
- #else
- @ Initially shift the divisor left 3 bits if possible,
- @ set curbit accordingly. This allows for curbit to be located
- @ at the left end of each 4 bit nibbles in the division loop
- @ to save one loop in most cases.
- tst \divisor, #0xe0000000
- moveq \divisor, \divisor, lsl #3
- moveq \curbit, #8
- movne \curbit, #1
- @ Unless the divisor is very big, shift it up in multiples of
- @ four bits, since this is the amount of unwinding in the main
- @ division loop. Continue shifting until the divisor is
- @ larger than the dividend.
- 1: cmp \divisor, #0x10000000
- cmplo \divisor, \dividend
- movlo \divisor, \divisor, lsl #4
- movlo \curbit, \curbit, lsl #4
- blo 1b
- @ For very big divisors, we must shift it a bit at a time, or
- @ we will be in danger of overflowing.
- 1: cmp \divisor, #0x80000000
- cmplo \divisor, \dividend
- movlo \divisor, \divisor, lsl #1
- movlo \curbit, \curbit, lsl #1
- blo 1b
- mov \result, #0
- #endif
- @ Division loop
- 1: cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
- orrhs \result, \result, \curbit
- cmp \dividend, \divisor, lsr #1
- subhs \dividend, \dividend, \divisor, lsr #1
- orrhs \result, \result, \curbit, lsr #1
- cmp \dividend, \divisor, lsr #2
- subhs \dividend, \dividend, \divisor, lsr #2
- orrhs \result, \result, \curbit, lsr #2
- cmp \dividend, \divisor, lsr #3
- subhs \dividend, \dividend, \divisor, lsr #3
- orrhs \result, \result, \curbit, lsr #3
- cmp \dividend, #0 @ Early termination?
- movsne \curbit, \curbit, lsr #4 @ No, any more bits to do?
- movne \divisor, \divisor, lsr #4
- bne 1b
- .endm
- .macro ARM_DIV2_ORDER divisor, order
- #if __LINUX_ARM_ARCH__ >= 5
- clz \order, \divisor
- rsb \order, \order, #31
- #else
- cmp \divisor, #(1 << 16)
- movhs \divisor, \divisor, lsr #16
- movhs \order, #16
- movlo \order, #0
- cmp \divisor, #(1 << 8)
- movhs \divisor, \divisor, lsr #8
- addhs \order, \order, #8
- cmp \divisor, #(1 << 4)
- movhs \divisor, \divisor, lsr #4
- addhs \order, \order, #4
- cmp \divisor, #(1 << 2)
- addhi \order, \order, #3
- addls \order, \order, \divisor, lsr #1
- #endif
- .endm
- .macro ARM_MOD_BODY dividend, divisor, order, spare
- #if __LINUX_ARM_ARCH__ >= 5
- clz \order, \divisor
- clz \spare, \dividend
- sub \order, \order, \spare
- mov \divisor, \divisor, lsl \order
- #else
- mov \order, #0
- @ Unless the divisor is very big, shift it up in multiples of
- @ four bits, since this is the amount of unwinding in the main
- @ division loop. Continue shifting until the divisor is
- @ larger than the dividend.
- 1: cmp \divisor, #0x10000000
- cmplo \divisor, \dividend
- movlo \divisor, \divisor, lsl #4
- addlo \order, \order, #4
- blo 1b
- @ For very big divisors, we must shift it a bit at a time, or
- @ we will be in danger of overflowing.
- 1: cmp \divisor, #0x80000000
- cmplo \divisor, \dividend
- movlo \divisor, \divisor, lsl #1
- addlo \order, \order, #1
- blo 1b
- #endif
- @ Perform all needed subtractions to keep only the reminder.
- @ Do comparisons in batch of 4 first.
- subs \order, \order, #3 @ yes, 3 is intended here
- blt 2f
- 1: cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
- cmp \dividend, \divisor, lsr #1
- subhs \dividend, \dividend, \divisor, lsr #1
- cmp \dividend, \divisor, lsr #2
- subhs \dividend, \dividend, \divisor, lsr #2
- cmp \dividend, \divisor, lsr #3
- subhs \dividend, \dividend, \divisor, lsr #3
- cmp \dividend, #1
- mov \divisor, \divisor, lsr #4
- subsge \order, \order, #4
- bge 1b
- tst \order, #3
- teqne \dividend, #0
- beq 5f
- @ Either 1, 2 or 3 comparison/subtractions are left.
- 2: cmn \order, #2
- blt 4f
- beq 3f
- cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
- mov \divisor, \divisor, lsr #1
- 3: cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
- mov \divisor, \divisor, lsr #1
- 4: cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
- 5:
- .endm
- .pushsection .text.__udivsi3, "ax"
- ENTRY(__udivsi3)
- ENTRY(__aeabi_uidiv)
- UNWIND(.fnstart)
- subs r2, r1, #1
- reteq lr
- bcc Ldiv0
- cmp r0, r1
- bls 11f
- tst r1, r2
- beq 12f
- ARM_DIV_BODY r0, r1, r2, r3
- mov r0, r2
- ret lr
- 11: moveq r0, #1
- movne r0, #0
- ret lr
- 12: ARM_DIV2_ORDER r1, r2
- mov r0, r0, lsr r2
- ret lr
- UNWIND(.fnend)
- ENDPROC(__udivsi3)
- ENDPROC(__aeabi_uidiv)
- .popsection
- .pushsection .text.__umodsi3, "ax"
- ENTRY(__umodsi3)
- UNWIND(.fnstart)
- subs r2, r1, #1 @ compare divisor with 1
- bcc Ldiv0
- cmpne r0, r1 @ compare dividend with divisor
- moveq r0, #0
- tsthi r1, r2 @ see if divisor is power of 2
- andeq r0, r0, r2
- retls lr
- ARM_MOD_BODY r0, r1, r2, r3
- ret lr
- UNWIND(.fnend)
- ENDPROC(__umodsi3)
- .popsection
- .pushsection .text.__divsi3, "ax"
- ENTRY(__divsi3)
- ENTRY(__aeabi_idiv)
- UNWIND(.fnstart)
- cmp r1, #0
- eor ip, r0, r1 @ save the sign of the result.
- beq Ldiv0
- rsbmi r1, r1, #0 @ loops below use unsigned.
- subs r2, r1, #1 @ division by 1 or -1 ?
- beq 10f
- movs r3, r0
- rsbmi r3, r0, #0 @ positive dividend value
- cmp r3, r1
- bls 11f
- tst r1, r2 @ divisor is power of 2 ?
- beq 12f
- ARM_DIV_BODY r3, r1, r0, r2
- cmp ip, #0
- rsbmi r0, r0, #0
- ret lr
- 10: teq ip, r0 @ same sign ?
- rsbmi r0, r0, #0
- ret lr
- 11: movlo r0, #0
- moveq r0, ip, asr #31
- orreq r0, r0, #1
- ret lr
- 12: ARM_DIV2_ORDER r1, r2
- cmp ip, #0
- mov r0, r3, lsr r2
- rsbmi r0, r0, #0
- ret lr
- UNWIND(.fnend)
- ENDPROC(__divsi3)
- ENDPROC(__aeabi_idiv)
- .popsection
- .pushsection .text.__modsi3, "ax"
- ENTRY(__modsi3)
- UNWIND(.fnstart)
- cmp r1, #0
- beq Ldiv0
- rsbmi r1, r1, #0 @ loops below use unsigned.
- movs ip, r0 @ preserve sign of dividend
- rsbmi r0, r0, #0 @ if negative make positive
- subs r2, r1, #1 @ compare divisor with 1
- cmpne r0, r1 @ compare dividend with divisor
- moveq r0, #0
- tsthi r1, r2 @ see if divisor is power of 2
- andeq r0, r0, r2
- bls 10f
- ARM_MOD_BODY r0, r1, r2, r3
- 10: cmp ip, #0
- rsbmi r0, r0, #0
- ret lr
- UNWIND(.fnend)
- ENDPROC(__modsi3)
- .popsection
- #ifdef CONFIG_AEABI
- .pushsection .text.__aeabi_uidivmod, "ax"
- ENTRY(__aeabi_uidivmod)
- UNWIND(.fnstart)
- UNWIND(.save {r0, r1, ip, lr} )
- stmfd sp!, {r0, r1, ip, lr}
- bl __aeabi_uidiv
- ldmfd sp!, {r1, r2, ip, lr}
- mul r3, r0, r2
- sub r1, r1, r3
- ret lr
- UNWIND(.fnend)
- ENDPROC(__aeabi_uidivmod)
- .popsection
- .pushsection .text.__aeabi_uidivmod, "ax"
- ENTRY(__aeabi_idivmod)
- UNWIND(.fnstart)
- UNWIND(.save {r0, r1, ip, lr} )
- stmfd sp!, {r0, r1, ip, lr}
- bl __aeabi_idiv
- ldmfd sp!, {r1, r2, ip, lr}
- mul r3, r0, r2
- sub r1, r1, r3
- ret lr
- UNWIND(.fnend)
- ENDPROC(__aeabi_idivmod)
- .popsection
- #endif
- .pushsection .text.Ldiv0, "ax"
- Ldiv0:
- UNWIND(.fnstart)
- UNWIND(.pad #4)
- UNWIND(.save {lr})
- str lr, [sp, #-8]!
- bl __div0
- mov r0, #0 @ About as wrong as it could be.
- ldr pc, [sp], #8
- UNWIND(.fnend)
- ENDPROC(Ldiv0)
- .popsection
- /* Thumb-1 specialities */
- #if defined(CONFIG_SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2)
- .pushsection .text.__gnu_thumb1_case_sqi, "ax"
- ENTRY(__gnu_thumb1_case_sqi)
- push {r1}
- mov r1, lr
- lsrs r1, r1, #1
- lsls r1, r1, #1
- ldrsb r1, [r1, r0]
- lsls r1, r1, #1
- add lr, lr, r1
- pop {r1}
- bx lr
- ENDPROC(__gnu_thumb1_case_sqi)
- .popsection
- .pushsection .text.__gnu_thumb1_case_uqi, "ax"
- ENTRY(__gnu_thumb1_case_uqi)
- push {r1}
- mov r1, lr
- lsrs r1, r1, #1
- lsls r1, r1, #1
- ldrb r1, [r1, r0]
- lsls r1, r1, #1
- add lr, lr, r1
- pop {r1}
- bx lr
- ENDPROC(__gnu_thumb1_case_uqi)
- .popsection
- .pushsection .text.__gnu_thumb1_case_shi, "ax"
- ENTRY(__gnu_thumb1_case_shi)
- push {r0, r1}
- mov r1, lr
- lsrs r1, r1, #1
- lsls r0, r0, #1
- lsls r1, r1, #1
- ldrsh r1, [r1, r0]
- lsls r1, r1, #1
- add lr, lr, r1
- pop {r0, r1}
- bx lr
- ENDPROC(__gnu_thumb1_case_shi)
- .popsection
- .pushsection .text.__gnu_thumb1_case_uhi, "ax"
- ENTRY(__gnu_thumb1_case_uhi)
- push {r0, r1}
- mov r1, lr
- lsrs r1, r1, #1
- lsls r0, r0, #1
- lsls r1, r1, #1
- ldrh r1, [r1, r0]
- lsls r1, r1, #1
- add lr, lr, r1
- pop {r0, r1}
- bx lr
- ENDPROC(__gnu_thumb1_case_uhi)
- .popsection
- #endif
|