arch/x86/lib/copy_user_64.S

   1 /*
   2  * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
   3  * Copyright 2002 Andi Kleen, SuSE Labs.
   4  * Subject to the GNU Public License v2.
   5  *
   6  * Functions to copy from and to user space.
   7  */
   8
   9 #include <linux/linkage.h>
  10 #include <asm/current.h>
  11 #include <asm/asm-offsets.h>
  12 #include <asm/thread_info.h>
  13 #include <asm/cpufeatures.h>
  14 #include <asm/alternative-asm.h>
  15 #include <asm/asm.h>
  16 #include <asm/smap.h>
  17 #include <asm/export.h>
  18
  19 /* Standard copy_to_user with segment limit checking */
  20 ENTRY(_copy_to_user)
  21         mov PER_CPU_VAR(current_task), %rax
  22         movq %rdi,%rcx
  23         addq %rdx,%rcx
  24         jc bad_to_user
  25         cmpq TASK_addr_limit(%rax),%rcx
  26         ja bad_to_user
  27         ALTERNATIVE_2 "jmp copy_user_generic_unrolled",         \
  28                       "jmp copy_user_generic_string",           \
  29                       X86_FEATURE_REP_GOOD,                     \
  30                       "jmp copy_user_enhanced_fast_string",     \
  31                       X86_FEATURE_ERMS
  32 ENDPROC(_copy_to_user)
  33 EXPORT_SYMBOL(_copy_to_user)
  34
  35 /* Standard copy_from_user with segment limit checking */
  36 ENTRY(_copy_from_user)
  37         mov PER_CPU_VAR(current_task), %rax
  38         movq %rsi,%rcx
  39         addq %rdx,%rcx
  40         jc bad_from_user
  41         cmpq TASK_addr_limit(%rax),%rcx
  42         ja bad_from_user
  43         ALTERNATIVE_2 "jmp copy_user_generic_unrolled",         \
  44                       "jmp copy_user_generic_string",           \
  45                       X86_FEATURE_REP_GOOD,                     \
  46                       "jmp copy_user_enhanced_fast_string",     \
  47                       X86_FEATURE_ERMS
  48 ENDPROC(_copy_from_user)
  49 EXPORT_SYMBOL(_copy_from_user)
  50
  51
  52         .section .fixup,"ax"
  53         /* must zero dest */
  54 ENTRY(bad_from_user)
  55 bad_from_user:
  56         movl %edx,%ecx
  57         xorl %eax,%eax
  58         rep
  59         stosb
  60 bad_to_user:
  61         movl %edx,%eax
  62         ret
  63 ENDPROC(bad_from_user)
  64         .previous
  65
  66 /*
  67  * copy_user_generic_unrolled - memory copy with exception handling.
  68  * This version is for CPUs like P4 that don't have efficient micro
  69  * code for rep movsq
  70  *
  71  * Input:
  72  * rdi destination
  73  * rsi source
  74  * rdx count
  75  *
  76  * Output:
  77  * eax uncopied bytes or 0 if successful.
  78  */
  79 ENTRY(copy_user_generic_unrolled)
  80         ASM_STAC
  81         cmpl $8,%edx
  82         jb 20f          /* less then 8 bytes, go to byte copy loop */
  83         ALIGN_DESTINATION
  84         movl %edx,%ecx
  85         andl $63,%edx
  86         shrl $6,%ecx
  87         jz 17f
  88 1:      movq (%rsi),%r8
  89 2:      movq 1*8(%rsi),%r9
  90 3:      movq 2*8(%rsi),%r10
  91 4:      movq 3*8(%rsi),%r11
  92 5:      movq %r8,(%rdi)
  93 6:      movq %r9,1*8(%rdi)
  94 7:      movq %r10,2*8(%rdi)
  95 8:      movq %r11,3*8(%rdi)
  96 9:      movq 4*8(%rsi),%r8
  97 10:     movq 5*8(%rsi),%r9
  98 11:     movq 6*8(%rsi),%r10
  99 12:     movq 7*8(%rsi),%r11
 100 13:     movq %r8,4*8(%rdi)
 101 14:     movq %r9,5*8(%rdi)
 102 15:     movq %r10,6*8(%rdi)
 103 16:     movq %r11,7*8(%rdi)
 104         leaq 64(%rsi),%rsi
 105         leaq 64(%rdi),%rdi
 106         decl %ecx
 107         jnz 1b
 108 17:     movl %edx,%ecx
 109         andl $7,%edx
 110         shrl $3,%ecx
 111         jz 20f
 112 18:     movq (%rsi),%r8
 113 19:     movq %r8,(%rdi)
 114         leaq 8(%rsi),%rsi
 115         leaq 8(%rdi),%rdi
 116         decl %ecx
 117         jnz 18b
 118 20:     andl %edx,%edx
 119         jz 23f
 120         movl %edx,%ecx
 121 21:     movb (%rsi),%al
 122 22:     movb %al,(%rdi)
 123         incq %rsi
 124         incq %rdi
 125         decl %ecx
 126         jnz 21b
 127 23:     xor %eax,%eax
 128         ASM_CLAC
 129         ret
 130
 131         .section .fixup,"ax"
 132 30:     shll $6,%ecx
 133         addl %ecx,%edx
 134         jmp 60f
 135 40:     leal (%rdx,%rcx,8),%edx
 136         jmp 60f
 137 50:     movl %ecx,%edx
 138 60:     jmp copy_user_handle_tail /* ecx is zerorest also */
 139         .previous
 140
 141         _ASM_EXTABLE(1b,30b)
 142         _ASM_EXTABLE(2b,30b)
 143         _ASM_EXTABLE(3b,30b)
 144         _ASM_EXTABLE(4b,30b)
 145         _ASM_EXTABLE(5b,30b)
 146         _ASM_EXTABLE(6b,30b)
 147         _ASM_EXTABLE(7b,30b)
 148         _ASM_EXTABLE(8b,30b)
 149         _ASM_EXTABLE(9b,30b)
 150         _ASM_EXTABLE(10b,30b)
 151         _ASM_EXTABLE(11b,30b)
 152         _ASM_EXTABLE(12b,30b)
 153         _ASM_EXTABLE(13b,30b)
 154         _ASM_EXTABLE(14b,30b)
 155         _ASM_EXTABLE(15b,30b)
 156         _ASM_EXTABLE(16b,30b)
 157         _ASM_EXTABLE(18b,40b)
 158         _ASM_EXTABLE(19b,40b)
 159         _ASM_EXTABLE(21b,50b)
 160         _ASM_EXTABLE(22b,50b)
 161 ENDPROC(copy_user_generic_unrolled)
 162 EXPORT_SYMBOL(copy_user_generic_unrolled)
 163
 164 /* Some CPUs run faster using the string copy instructions.
 165  * This is also a lot simpler. Use them when possible.
 166  *
 167  * Only 4GB of copy is supported. This shouldn't be a problem
 168  * because the kernel normally only writes from/to page sized chunks
 169  * even if user space passed a longer buffer.
 170  * And more would be dangerous because both Intel and AMD have
 171  * errata with rep movsq > 4GB. If someone feels the need to fix
 172  * this please consider this.
 173  *
 174  * Input:
 175  * rdi destination
 176  * rsi source
 177  * rdx count
 178  *
 179  * Output:
 180  * eax uncopied bytes or 0 if successful.
 181  */
 182 ENTRY(copy_user_generic_string)
 183         ASM_STAC
 184         cmpl $8,%edx
 185         jb 2f           /* less than 8 bytes, go to byte copy loop */
 186         ALIGN_DESTINATION
 187         movl %edx,%ecx
 188         shrl $3,%ecx
 189         andl $7,%edx
 190 1:      rep
 191         movsq
 192 2:      movl %edx,%ecx
 193 3:      rep
 194         movsb
 195         xorl %eax,%eax
 196         ASM_CLAC
 197         ret
 198
 199         .section .fixup,"ax"
 200 11:     leal (%rdx,%rcx,8),%ecx
 201 12:     movl %ecx,%edx          /* ecx is zerorest also */
 202         jmp copy_user_handle_tail
 203         .previous
 204
 205         _ASM_EXTABLE(1b,11b)
 206         _ASM_EXTABLE(3b,12b)
 207 ENDPROC(copy_user_generic_string)
 208 EXPORT_SYMBOL(copy_user_generic_string)
 209
 210 /*
 211  * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
 212  * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
 213  *
 214  * Input:
 215  * rdi destination
 216  * rsi source
 217  * rdx count
 218  *
 219  * Output:
 220  * eax uncopied bytes or 0 if successful.
 221  */
 222 ENTRY(copy_user_enhanced_fast_string)
 223         ASM_STAC
 224         movl %edx,%ecx
 225 1:      rep
 226         movsb
 227         xorl %eax,%eax
 228         ASM_CLAC
 229         ret
 230
 231         .section .fixup,"ax"
 232 12:     movl %ecx,%edx          /* ecx is zerorest also */
 233         jmp copy_user_handle_tail
 234         .previous
 235
 236         _ASM_EXTABLE(1b,12b)
 237 ENDPROC(copy_user_enhanced_fast_string)
 238 EXPORT_SYMBOL(copy_user_enhanced_fast_string)
 239
 240 /*
 241  * copy_user_nocache - Uncached memory copy with exception handling
 242  * This will force destination out of cache for more performance.
 243  *
 244  * Note: Cached memory copy is used when destination or size is not
 245  * naturally aligned. That is:
 246  *  - Require 8-byte alignment when size is 8 bytes or larger.
 247  *  - Require 4-byte alignment when size is 4 bytes.
 248  */
 249 ENTRY(__copy_user_nocache)
 250         ASM_STAC
 251
 252         /* If size is less than 8 bytes, go to 4-byte copy */
 253         cmpl $8,%edx
 254         jb .L_4b_nocache_copy_entry
 255
 256         /* If destination is not 8-byte aligned, "cache" copy to align it */
 257         ALIGN_DESTINATION
 258
 259         /* Set 4x8-byte copy count and remainder */
 260         movl %edx,%ecx
 261         andl $63,%edx
 262         shrl $6,%ecx
 263         jz .L_8b_nocache_copy_entry     /* jump if count is 0 */
 264
 265         /* Perform 4x8-byte nocache loop-copy */
 266 .L_4x8b_nocache_copy_loop:
 267 1:      movq (%rsi),%r8
 268 2:      movq 1*8(%rsi),%r9
 269 3:      movq 2*8(%rsi),%r10
 270 4:      movq 3*8(%rsi),%r11
 271 5:      movnti %r8,(%rdi)
 272 6:      movnti %r9,1*8(%rdi)
 273 7:      movnti %r10,2*8(%rdi)
 274 8:      movnti %r11,3*8(%rdi)
 275 9:      movq 4*8(%rsi),%r8
 276 10:     movq 5*8(%rsi),%r9
 277 11:     movq 6*8(%rsi),%r10
 278 12:     movq 7*8(%rsi),%r11
 279 13:     movnti %r8,4*8(%rdi)
 280 14:     movnti %r9,5*8(%rdi)
 281 15:     movnti %r10,6*8(%rdi)
 282 16:     movnti %r11,7*8(%rdi)
 283         leaq 64(%rsi),%rsi
 284         leaq 64(%rdi),%rdi
 285         decl %ecx
 286         jnz .L_4x8b_nocache_copy_loop
 287
 288         /* Set 8-byte copy count and remainder */
 289 .L_8b_nocache_copy_entry:
 290         movl %edx,%ecx
 291         andl $7,%edx
 292         shrl $3,%ecx
 293         jz .L_4b_nocache_copy_entry     /* jump if count is 0 */
 294
 295         /* Perform 8-byte nocache loop-copy */
 296 .L_8b_nocache_copy_loop:
 297 20:     movq (%rsi),%r8
 298 21:     movnti %r8,(%rdi)
 299         leaq 8(%rsi),%rsi
 300         leaq 8(%rdi),%rdi
 301         decl %ecx
 302         jnz .L_8b_nocache_copy_loop
 303
 304         /* If no byte left, we're done */
 305 .L_4b_nocache_copy_entry:
 306         andl %edx,%edx
 307         jz .L_finish_copy
 308
 309         /* If destination is not 4-byte aligned, go to byte copy: */
 310         movl %edi,%ecx
 311         andl $3,%ecx
 312         jnz .L_1b_cache_copy_entry
 313
 314         /* Set 4-byte copy count (1 or 0) and remainder */
 315         movl %edx,%ecx
 316         andl $3,%edx
 317         shrl $2,%ecx
 318         jz .L_1b_cache_copy_entry       /* jump if count is 0 */
 319
 320         /* Perform 4-byte nocache copy: */
 321 30:     movl (%rsi),%r8d
 322 31:     movnti %r8d,(%rdi)
 323         leaq 4(%rsi),%rsi
 324         leaq 4(%rdi),%rdi
 325
 326         /* If no bytes left, we're done: */
 327         andl %edx,%edx
 328         jz .L_finish_copy
 329
 330         /* Perform byte "cache" loop-copy for the remainder */
 331 .L_1b_cache_copy_entry:
 332         movl %edx,%ecx
 333 .L_1b_cache_copy_loop:
 334 40:     movb (%rsi),%al
 335 41:     movb %al,(%rdi)
 336         incq %rsi
 337         incq %rdi
 338         decl %ecx
 339         jnz .L_1b_cache_copy_loop
 340
 341         /* Finished copying; fence the prior stores */
 342 .L_finish_copy:
 343         xorl %eax,%eax
 344         ASM_CLAC
 345         sfence
 346         ret
 347
 348         .section .fixup,"ax"
 349 .L_fixup_4x8b_copy:
 350         shll $6,%ecx
 351         addl %ecx,%edx
 352         jmp .L_fixup_handle_tail
 353 .L_fixup_8b_copy:
 354         lea (%rdx,%rcx,8),%rdx
 355         jmp .L_fixup_handle_tail
 356 .L_fixup_4b_copy:
 357         lea (%rdx,%rcx,4),%rdx
 358         jmp .L_fixup_handle_tail
 359 .L_fixup_1b_copy:
 360         movl %ecx,%edx
 361 .L_fixup_handle_tail:
 362         sfence
 363         jmp copy_user_handle_tail
 364         .previous
 365
 366         _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
 367         _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
 368         _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
 369         _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
 370         _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
 371         _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
 372         _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
 373         _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
 374         _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
 375         _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
 376         _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
 377         _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
 378         _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
 379         _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
 380         _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
 381         _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
 382         _ASM_EXTABLE(20b,.L_fixup_8b_copy)
 383         _ASM_EXTABLE(21b,.L_fixup_8b_copy)
 384         _ASM_EXTABLE(30b,.L_fixup_4b_copy)
 385         _ASM_EXTABLE(31b,.L_fixup_4b_copy)
 386         _ASM_EXTABLE(40b,.L_fixup_1b_copy)
 387         _ASM_EXTABLE(41b,.L_fixup_1b_copy)
 388 ENDPROC(__copy_user_nocache)
 389 EXPORT_SYMBOL(__copy_user_nocache)