]> git.kernelconcepts.de Git - karo-tx-uboot.git/blobdiff - arch/sh/lib/udivsi3_i4i-Os.S
sh: import missing private libraries from Linux 3.19
[karo-tx-uboot.git] / arch / sh / lib / udivsi3_i4i-Os.S
diff --git a/arch/sh/lib/udivsi3_i4i-Os.S b/arch/sh/lib/udivsi3_i4i-Os.S
new file mode 100644 (file)
index 0000000..54988ee
--- /dev/null
@@ -0,0 +1,128 @@
+/* Copyright (C) 2006 Free Software Foundation, Inc.
+
+ * SPDX-License-Identifier:    GPL-2.0+
+ */
+
+/* Moderately Space-optimized libgcc routines for the Renesas SH /
+   STMicroelectronics ST40 CPUs.
+   Contributed by J"orn Rennecke joern.rennecke@st.com.  */
+
+/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
+   sh4-200 run times:
+   udiv small divisor: 55 cycles
+   udiv large divisor: 52 cycles
+   sdiv small divisor, positive result: 59 cycles
+   sdiv large divisor, positive result: 56 cycles
+   sdiv small divisor, negative result: 65 cycles (*)
+   sdiv large divisor, negative result: 62 cycles (*)
+   (*): r2 is restored in the rts delay slot and has a lingering latency
+        of two more cycles.  */
+       .balign 4
+       .global __udivsi3_i4i
+       .global __udivsi3_i4
+       .set    __udivsi3_i4, __udivsi3_i4i
+       .type   __udivsi3_i4i, @function
+       .type   __sdivsi3_i4i, @function
+__udivsi3_i4i:
+       sts pr,r1
+       mov.l r4,@-r15
+       extu.w r5,r0
+       cmp/eq r5,r0
+       swap.w r4,r0
+       shlr16 r4
+       bf/s large_divisor
+       div0u
+       mov.l r5,@-r15
+       shll16 r5
+sdiv_small_divisor:
+       div1 r5,r4
+       bsr div6
+       div1 r5,r4
+       div1 r5,r4
+       bsr div6
+       div1 r5,r4
+       xtrct r4,r0
+       xtrct r0,r4
+       bsr div7
+       swap.w r4,r4
+       div1 r5,r4
+       bsr div7
+       div1 r5,r4
+       xtrct r4,r0
+       mov.l @r15+,r5
+       swap.w r0,r0
+       mov.l @r15+,r4
+       jmp @r1
+       rotcl r0
+div7:
+       div1 r5,r4
+div6:
+                   div1 r5,r4; div1 r5,r4; div1 r5,r4
+       div1 r5,r4; div1 r5,r4; rts;        div1 r5,r4
+
+divx3:
+       rotcl r0
+       div1 r5,r4
+       rotcl r0
+       div1 r5,r4
+       rotcl r0
+       rts
+       div1 r5,r4
+
+large_divisor:
+       mov.l r5,@-r15
+sdiv_large_divisor:
+       xor r4,r0
+       .rept 4
+       rotcl r0
+       bsr divx3
+       div1 r5,r4
+       .endr
+       mov.l @r15+,r5
+       mov.l @r15+,r4
+       jmp @r1
+       rotcl r0
+
+       .global __sdivsi3_i4i
+       .global __sdivsi3_i4
+       .global __sdivsi3
+       .set    __sdivsi3_i4, __sdivsi3_i4i
+       .set    __sdivsi3, __sdivsi3_i4i
+__sdivsi3_i4i:
+       mov.l r4,@-r15
+       cmp/pz r5
+       mov.l r5,@-r15
+       bt/s pos_divisor
+       cmp/pz r4
+       neg r5,r5
+       extu.w r5,r0
+       bt/s neg_result
+       cmp/eq r5,r0
+       neg r4,r4
+pos_result:
+       swap.w r4,r0
+       bra sdiv_check_divisor
+       sts pr,r1
+pos_divisor:
+       extu.w r5,r0
+       bt/s pos_result
+       cmp/eq r5,r0
+       neg r4,r4
+neg_result:
+       mova negate_result,r0
+       ;
+       mov r0,r1
+       swap.w r4,r0
+       lds r2,macl
+       sts pr,r2
+sdiv_check_divisor:
+       shlr16 r4
+       bf/s sdiv_large_divisor
+       div0u
+       bra sdiv_small_divisor
+       shll16 r5
+       .balign 4
+negate_result:
+       neg r0,r0
+       jmp @r2
+       sts macl,r2