arch/x86/lib/copy_user_64.S

   1 /*
   2  * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
   3  * Copyright 2002 Andi Kleen, SuSE Labs.
   4  * Subject to the GNU Public License v2.
   5  *
   6  * Functions to copy from and to user space.
   7  */
   8
   9 #include <linux/linkage.h>
  10 #include <asm/current.h>
  11 #include <asm/asm-offsets.h>
  12 #include <asm/thread_info.h>
  13 #include <asm/cpufeatures.h>
  14 #include <asm/alternative-asm.h>
  15 #include <asm/asm.h>
  16 #include <asm/smap.h>
  17
  18 /* Standard copy_to_user with segment limit checking */
  19 ENTRY(_copy_to_user)
  20         mov PER_CPU_VAR(current_task), %rax
  21         movq %rdi,%rcx
  22         addq %rdx,%rcx
  23         jc bad_to_user
  24         cmpq TASK_addr_limit(%rax),%rcx
  25         ja bad_to_user
  26         ALTERNATIVE_2 "jmp copy_user_generic_unrolled",         \
  27                       "jmp copy_user_generic_string",           \
  28                       X86_FEATURE_REP_GOOD,                     \
  29                       "jmp copy_user_enhanced_fast_string",     \
  30                       X86_FEATURE_ERMS
  31 ENDPROC(_copy_to_user)
  32
  33 /* Standard copy_from_user with segment limit checking */
  34 ENTRY(_copy_from_user)
  35         mov PER_CPU_VAR(current_task), %rax
  36         movq %rsi,%rcx
  37         addq %rdx,%rcx
  38         jc bad_from_user
  39         cmpq TASK_addr_limit(%rax),%rcx
  40         ja bad_from_user
  41         ALTERNATIVE_2 "jmp copy_user_generic_unrolled",         \
  42                       "jmp copy_user_generic_string",           \
  43                       X86_FEATURE_REP_GOOD,                     \
  44                       "jmp copy_user_enhanced_fast_string",     \
  45                       X86_FEATURE_ERMS
  46 ENDPROC(_copy_from_user)
  47
  48         .section .fixup,"ax"
  49         /* must zero dest */
  50 ENTRY(bad_from_user)
  51 bad_from_user:
  52         movl %edx,%ecx
  53         xorl %eax,%eax
  54         rep
  55         stosb
  56 bad_to_user:
  57         movl %edx,%eax
  58         ret
  59 ENDPROC(bad_from_user)
  60         .previous
  61
  62 /*
  63  * copy_user_generic_unrolled - memory copy with exception handling.
  64  * This version is for CPUs like P4 that don't have efficient micro
  65  * code for rep movsq
  66  *
  67  * Input:
  68  * rdi destination
  69  * rsi source
  70  * rdx count
  71  *
  72  * Output:
  73  * eax uncopied bytes or 0 if successful.
  74  */
  75 ENTRY(copy_user_generic_unrolled)
  76         ASM_STAC
  77         cmpl $8,%edx
  78         jb 20f          /* less then 8 bytes, go to byte copy loop */
  79         ALIGN_DESTINATION
  80         movl %edx,%ecx
  81         andl $63,%edx
  82         shrl $6,%ecx
  83         jz 17f
  84 1:      movq (%rsi),%r8
  85 2:      movq 1*8(%rsi),%r9
  86 3:      movq 2*8(%rsi),%r10
  87 4:      movq 3*8(%rsi),%r11
  88 5:      movq %r8,(%rdi)
  89 6:      movq %r9,1*8(%rdi)
  90 7:      movq %r10,2*8(%rdi)
  91 8:      movq %r11,3*8(%rdi)
  92 9:      movq 4*8(%rsi),%r8
  93 10:     movq 5*8(%rsi),%r9
  94 11:     movq 6*8(%rsi),%r10
  95 12:     movq 7*8(%rsi),%r11
  96 13:     movq %r8,4*8(%rdi)
  97 14:     movq %r9,5*8(%rdi)
  98 15:     movq %r10,6*8(%rdi)
  99 16:     movq %r11,7*8(%rdi)
 100         leaq 64(%rsi),%rsi
 101         leaq 64(%rdi),%rdi
 102         decl %ecx
 103         jnz 1b
 104 17:     movl %edx,%ecx
 105         andl $7,%edx
 106         shrl $3,%ecx
 107         jz 20f
 108 18:     movq (%rsi),%r8
 109 19:     movq %r8,(%rdi)
 110         leaq 8(%rsi),%rsi
 111         leaq 8(%rdi),%rdi
 112         decl %ecx
 113         jnz 18b
 114 20:     andl %edx,%edx
 115         jz 23f
 116         movl %edx,%ecx
 117 21:     movb (%rsi),%al
 118 22:     movb %al,(%rdi)
 119         incq %rsi
 120         incq %rdi
 121         decl %ecx
 122         jnz 21b
 123 23:     xor %eax,%eax
 124         ASM_CLAC
 125         ret
 126
 127         .section .fixup,"ax"
 128 30:     shll $6,%ecx
 129         addl %ecx,%edx
 130         jmp 60f
 131 40:     leal (%rdx,%rcx,8),%edx
 132         jmp 60f
 133 50:     movl %ecx,%edx
 134 60:     jmp copy_user_handle_tail /* ecx is zerorest also */
 135         .previous
 136
 137         _ASM_EXTABLE(1b,30b)
 138         _ASM_EXTABLE(2b,30b)
 139         _ASM_EXTABLE(3b,30b)
 140         _ASM_EXTABLE(4b,30b)
 141         _ASM_EXTABLE(5b,30b)
 142         _ASM_EXTABLE(6b,30b)
 143         _ASM_EXTABLE(7b,30b)
 144         _ASM_EXTABLE(8b,30b)
 145         _ASM_EXTABLE(9b,30b)
 146         _ASM_EXTABLE(10b,30b)
 147         _ASM_EXTABLE(11b,30b)
 148         _ASM_EXTABLE(12b,30b)
 149         _ASM_EXTABLE(13b,30b)
 150         _ASM_EXTABLE(14b,30b)
 151         _ASM_EXTABLE(15b,30b)
 152         _ASM_EXTABLE(16b,30b)
 153         _ASM_EXTABLE(18b,40b)
 154         _ASM_EXTABLE(19b,40b)
 155         _ASM_EXTABLE(21b,50b)
 156         _ASM_EXTABLE(22b,50b)
 157 ENDPROC(copy_user_generic_unrolled)
 158
 159 /* Some CPUs run faster using the string copy instructions.
 160  * This is also a lot simpler. Use them when possible.
 161  *
 162  * Only 4GB of copy is supported. This shouldn't be a problem
 163  * because the kernel normally only writes from/to page sized chunks
 164  * even if user space passed a longer buffer.
 165  * And more would be dangerous because both Intel and AMD have
 166  * errata with rep movsq > 4GB. If someone feels the need to fix
 167  * this please consider this.
 168  *
 169  * Input:
 170  * rdi destination
 171  * rsi source
 172  * rdx count
 173  *
 174  * Output:
 175  * eax uncopied bytes or 0 if successful.
 176  */
 177 ENTRY(copy_user_generic_string)
 178         ASM_STAC
 179         cmpl $8,%edx
 180         jb 2f           /* less than 8 bytes, go to byte copy loop */
 181         ALIGN_DESTINATION
 182         movl %edx,%ecx
 183         shrl $3,%ecx
 184         andl $7,%edx
 185 1:      rep
 186         movsq
 187 2:      movl %edx,%ecx
 188 3:      rep
 189         movsb
 190         xorl %eax,%eax
 191         ASM_CLAC
 192         ret
 193
 194         .section .fixup,"ax"
 195 11:     leal (%rdx,%rcx,8),%ecx
 196 12:     movl %ecx,%edx          /* ecx is zerorest also */
 197         jmp copy_user_handle_tail
 198         .previous
 199
 200         _ASM_EXTABLE(1b,11b)
 201         _ASM_EXTABLE(3b,12b)
 202 ENDPROC(copy_user_generic_string)
 203
 204 /*
 205  * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
 206  * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
 207  *
 208  * Input:
 209  * rdi destination
 210  * rsi source
 211  * rdx count
 212  *
 213  * Output:
 214  * eax uncopied bytes or 0 if successful.
 215  */
 216 ENTRY(copy_user_enhanced_fast_string)
 217         ASM_STAC
 218         movl %edx,%ecx
 219 1:      rep
 220         movsb
 221         xorl %eax,%eax
 222         ASM_CLAC
 223         ret
 224
 225         .section .fixup,"ax"
 226 12:     movl %ecx,%edx          /* ecx is zerorest also */
 227         jmp copy_user_handle_tail
 228         .previous
 229
 230         _ASM_EXTABLE(1b,12b)
 231 ENDPROC(copy_user_enhanced_fast_string)
 232
 233 /*
 234  * copy_user_nocache - Uncached memory copy with exception handling
 235  * This will force destination out of cache for more performance.
 236  *
 237  * Note: Cached memory copy is used when destination or size is not
 238  * naturally aligned. That is:
 239  *  - Require 8-byte alignment when size is 8 bytes or larger.
 240  *  - Require 4-byte alignment when size is 4 bytes.
 241  */
 242 ENTRY(__copy_user_nocache)
 243         ASM_STAC
 244
 245         /* If size is less than 8 bytes, go to 4-byte copy */
 246         cmpl $8,%edx
 247         jb .L_4b_nocache_copy_entry
 248
 249         /* If destination is not 8-byte aligned, "cache" copy to align it */
 250         ALIGN_DESTINATION
 251
 252         /* Set 4x8-byte copy count and remainder */
 253         movl %edx,%ecx
 254         andl $63,%edx
 255         shrl $6,%ecx
 256         jz .L_8b_nocache_copy_entry     /* jump if count is 0 */
 257
 258         /* Perform 4x8-byte nocache loop-copy */
 259 .L_4x8b_nocache_copy_loop:
 260 1:      movq (%rsi),%r8
 261 2:      movq 1*8(%rsi),%r9
 262 3:      movq 2*8(%rsi),%r10
 263 4:      movq 3*8(%rsi),%r11
 264 5:      movnti %r8,(%rdi)
 265 6:      movnti %r9,1*8(%rdi)
 266 7:      movnti %r10,2*8(%rdi)
 267 8:      movnti %r11,3*8(%rdi)
 268 9:      movq 4*8(%rsi),%r8
 269 10:     movq 5*8(%rsi),%r9
 270 11:     movq 6*8(%rsi),%r10
 271 12:     movq 7*8(%rsi),%r11
 272 13:     movnti %r8,4*8(%rdi)
 273 14:     movnti %r9,5*8(%rdi)
 274 15:     movnti %r10,6*8(%rdi)
 275 16:     movnti %r11,7*8(%rdi)
 276         leaq 64(%rsi),%rsi
 277         leaq 64(%rdi),%rdi
 278         decl %ecx
 279         jnz .L_4x8b_nocache_copy_loop
 280
 281         /* Set 8-byte copy count and remainder */
 282 .L_8b_nocache_copy_entry:
 283         movl %edx,%ecx
 284         andl $7,%edx
 285         shrl $3,%ecx
 286         jz .L_4b_nocache_copy_entry     /* jump if count is 0 */
 287
 288         /* Perform 8-byte nocache loop-copy */
 289 .L_8b_nocache_copy_loop:
 290 20:     movq (%rsi),%r8
 291 21:     movnti %r8,(%rdi)
 292         leaq 8(%rsi),%rsi
 293         leaq 8(%rdi),%rdi
 294         decl %ecx
 295         jnz .L_8b_nocache_copy_loop
 296
 297         /* If no byte left, we're done */
 298 .L_4b_nocache_copy_entry:
 299         andl %edx,%edx
 300         jz .L_finish_copy
 301
 302         /* If destination is not 4-byte aligned, go to byte copy: */
 303         movl %edi,%ecx
 304         andl $3,%ecx
 305         jnz .L_1b_cache_copy_entry
 306
 307         /* Set 4-byte copy count (1 or 0) and remainder */
 308         movl %edx,%ecx
 309         andl $3,%edx
 310         shrl $2,%ecx
 311         jz .L_1b_cache_copy_entry       /* jump if count is 0 */
 312
 313         /* Perform 4-byte nocache copy: */
 314 30:     movl (%rsi),%r8d
 315 31:     movnti %r8d,(%rdi)
 316         leaq 4(%rsi),%rsi
 317         leaq 4(%rdi),%rdi
 318
 319         /* If no bytes left, we're done: */
 320         andl %edx,%edx
 321         jz .L_finish_copy
 322
 323         /* Perform byte "cache" loop-copy for the remainder */
 324 .L_1b_cache_copy_entry:
 325         movl %edx,%ecx
 326 .L_1b_cache_copy_loop:
 327 40:     movb (%rsi),%al
 328 41:     movb %al,(%rdi)
 329         incq %rsi
 330         incq %rdi
 331         decl %ecx
 332         jnz .L_1b_cache_copy_loop
 333
 334         /* Finished copying; fence the prior stores */
 335 .L_finish_copy:
 336         xorl %eax,%eax
 337         ASM_CLAC
 338         sfence
 339         ret
 340
 341         .section .fixup,"ax"
 342 .L_fixup_4x8b_copy:
 343         shll $6,%ecx
 344         addl %ecx,%edx
 345         jmp .L_fixup_handle_tail
 346 .L_fixup_8b_copy:
 347         lea (%rdx,%rcx,8),%rdx
 348         jmp .L_fixup_handle_tail
 349 .L_fixup_4b_copy:
 350         lea (%rdx,%rcx,4),%rdx
 351         jmp .L_fixup_handle_tail
 352 .L_fixup_1b_copy:
 353         movl %ecx,%edx
 354 .L_fixup_handle_tail:
 355         sfence
 356         jmp copy_user_handle_tail
 357         .previous
 358
 359         _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
 360         _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
 361         _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
 362         _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
 363         _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
 364         _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
 365         _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
 366         _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
 367         _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
 368         _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
 369         _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
 370         _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
 371         _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
 372         _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
 373         _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
 374         _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
 375         _ASM_EXTABLE(20b,.L_fixup_8b_copy)
 376         _ASM_EXTABLE(21b,.L_fixup_8b_copy)
 377         _ASM_EXTABLE(30b,.L_fixup_4b_copy)
 378         _ASM_EXTABLE(31b,.L_fixup_4b_copy)
 379         _ASM_EXTABLE(40b,.L_fixup_1b_copy)
 380         _ASM_EXTABLE(41b,.L_fixup_1b_copy)
 381 ENDPROC(__copy_user_nocache)