123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246 |
- /*
- * Copyright 2010, Google Inc.
- *
- * Brought in from coreboot uldivmod.S
- *
- * SPDX-License-Identifier: GPL-2.0
- */
- #include <linux/linkage.h>
- #include <asm/assembler.h>
- /*
- * A, Q = r0 + (r1 << 32)
- * B, R = r2 + (r3 << 32)
- * A / B = Q ... R
- */
- A_0 .req r0
- A_1 .req r1
- B_0 .req r2
- B_1 .req r3
- C_0 .req r4
- C_1 .req r5
- D_0 .req r6
- D_1 .req r7
- Q_0 .req r0
- Q_1 .req r1
- R_0 .req r2
- R_1 .req r3
- THUMB(
- TMP .req r8
- )
- .pushsection .text.__aeabi_uldivmod, "ax"
- ENTRY(__aeabi_uldivmod)
- stmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) lr}
- @ Test if B == 0
- orrs ip, B_0, B_1 @ Z set -> B == 0
- beq L_div_by_0
- @ Test if B is power of 2: (B & (B - 1)) == 0
- subs C_0, B_0, #1
- sbc C_1, B_1, #0
- tst C_0, B_0
- tsteq B_1, C_1
- beq L_pow2
- @ Test if A_1 == B_1 == 0
- orrs ip, A_1, B_1
- beq L_div_32_32
- L_div_64_64:
- /* CLZ only exists in ARM architecture version 5 and above. */
- #ifdef HAVE_CLZ
- mov C_0, #1
- mov C_1, #0
- @ D_0 = clz A
- teq A_1, #0
- clz D_0, A_1
- clzeq ip, A_0
- addeq D_0, D_0, ip
- @ D_1 = clz B
- teq B_1, #0
- clz D_1, B_1
- clzeq ip, B_0
- addeq D_1, D_1, ip
- @ if clz B - clz A > 0
- subs D_0, D_1, D_0
- bls L_done_shift
- @ B <<= (clz B - clz A)
- subs D_1, D_0, #32
- rsb ip, D_0, #32
- movmi B_1, B_1, lsl D_0
- ARM( orrmi B_1, B_1, B_0, lsr ip )
- THUMB( lsrmi TMP, B_0, ip )
- THUMB( orrmi B_1, B_1, TMP )
- movpl B_1, B_0, lsl D_1
- mov B_0, B_0, lsl D_0
- @ C = 1 << (clz B - clz A)
- movmi C_1, C_1, lsl D_0
- ARM( orrmi C_1, C_1, C_0, lsr ip )
- THUMB( lsrmi TMP, C_0, ip )
- THUMB( orrmi C_1, C_1, TMP )
- movpl C_1, C_0, lsl D_1
- mov C_0, C_0, lsl D_0
- L_done_shift:
- mov D_0, #0
- mov D_1, #0
- @ C: current bit; D: result
- #else
- @ C: current bit; D: result
- mov C_0, #1
- mov C_1, #0
- mov D_0, #0
- mov D_1, #0
- L_lsl_4:
- cmp B_1, #0x10000000
- cmpcc B_1, A_1
- cmpeq B_0, A_0
- bcs L_lsl_1
- @ B <<= 4
- mov B_1, B_1, lsl #4
- orr B_1, B_1, B_0, lsr #28
- mov B_0, B_0, lsl #4
- @ C <<= 4
- mov C_1, C_1, lsl #4
- orr C_1, C_1, C_0, lsr #28
- mov C_0, C_0, lsl #4
- b L_lsl_4
- L_lsl_1:
- cmp B_1, #0x80000000
- cmpcc B_1, A_1
- cmpeq B_0, A_0
- bcs L_subtract
- @ B <<= 1
- mov B_1, B_1, lsl #1
- orr B_1, B_1, B_0, lsr #31
- mov B_0, B_0, lsl #1
- @ C <<= 1
- mov C_1, C_1, lsl #1
- orr C_1, C_1, C_0, lsr #31
- mov C_0, C_0, lsl #1
- b L_lsl_1
- #endif
- L_subtract:
- @ if A >= B
- cmp A_1, B_1
- cmpeq A_0, B_0
- bcc L_update
- @ A -= B
- subs A_0, A_0, B_0
- sbc A_1, A_1, B_1
- @ D |= C
- orr D_0, D_0, C_0
- orr D_1, D_1, C_1
- L_update:
- @ if A == 0: break
- orrs ip, A_1, A_0
- beq L_exit
- @ C >>= 1
- movs C_1, C_1, lsr #1
- movs C_0, C_0, rrx
- @ if C == 0: break
- orrs ip, C_1, C_0
- beq L_exit
- @ B >>= 1
- movs B_1, B_1, lsr #1
- mov B_0, B_0, rrx
- b L_subtract
- L_exit:
- @ Note: A, B & Q, R are aliases
- mov R_0, A_0
- mov R_1, A_1
- mov Q_0, D_0
- mov Q_1, D_1
- ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
- L_div_32_32:
- @ Note: A_0 & r0 are aliases
- @ Q_1 r1
- mov r1, B_0
- bl __aeabi_uidivmod
- mov R_0, r1
- mov R_1, #0
- mov Q_1, #0
- ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
- L_pow2:
- #ifdef HAVE_CLZ
- @ Note: A, B and Q, R are aliases
- @ R = A & (B - 1)
- and C_0, A_0, C_0
- and C_1, A_1, C_1
- @ Q = A >> log2(B)
- @ Note: B must not be 0 here!
- clz D_0, B_0
- add D_1, D_0, #1
- rsbs D_0, D_0, #31
- bpl L_1
- clz D_0, B_1
- rsb D_0, D_0, #31
- mov A_0, A_1, lsr D_0
- add D_0, D_0, #32
- L_1:
- movpl A_0, A_0, lsr D_0
- ARM( orrpl A_0, A_0, A_1, lsl D_1 )
- THUMB( lslpl TMP, A_1, D_1 )
- THUMB( orrpl A_0, A_0, TMP )
- mov A_1, A_1, lsr D_0
- @ Mov back C to R
- mov R_0, C_0
- mov R_1, C_1
- ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
- #else
- @ Note: A, B and Q, R are aliases
- @ R = A & (B - 1)
- and C_0, A_0, C_0
- and C_1, A_1, C_1
- @ Q = A >> log2(B)
- @ Note: B must not be 0 here!
- @ Count the leading zeroes in B.
- mov D_0, #0
- orrs B_0, B_0, B_0
- @ If B is greater than 1 << 31, divide A and B by 1 << 32.
- moveq A_0, A_1
- moveq A_1, #0
- moveq B_0, B_1
- @ Count the remaining leading zeroes in B.
- movs B_1, B_0, lsl #16
- addeq D_0, #16
- moveq B_0, B_0, lsr #16
- tst B_0, #0xff
- addeq D_0, #8
- moveq B_0, B_0, lsr #8
- tst B_0, #0xf
- addeq D_0, #4
- moveq B_0, B_0, lsr #4
- tst B_0, #0x3
- addeq D_0, #2
- moveq B_0, B_0, lsr #2
- tst B_0, #0x1
- addeq D_0, #1
- @ Shift A to the right by the appropriate amount.
- rsb D_1, D_0, #32
- mov Q_0, A_0, lsr D_0
- ARM( orr Q_0, Q_0, A_1, lsl D_1 )
- THUMB( lsl A_1, D_1 )
- THUMB( orr Q_0, A_1 )
- mov Q_1, A_1, lsr D_0
- @ Move C to R
- mov R_0, C_0
- mov R_1, C_1
- ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
- #endif
- L_div_by_0:
- bl __div0
- @ As wrong as it could be
- mov Q_0, #0
- mov Q_1, #0
- mov R_0, #0
- mov R_1, #0
- ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc}
- ENDPROC(__aeabi_uldivmod)
- .popsection
|