arm64: cmpxchg_dbl: patch in lse instructions when supported by the CPU
authorWill Deacon <will.deacon@arm.com>
Thu, 14 May 2015 17:05:50 +0000 (18:05 +0100)
committerWill Deacon <will.deacon@arm.com>
Mon, 27 Jul 2015 14:28:52 +0000 (15:28 +0100)
On CPUs which support the LSE atomic instructions introduced in ARMv8.1,
it makes sense to use them in preference to ll/sc sequences.

This patch introduces runtime patching of our cmpxchg_double primitives
so that the LSE casp instruction is used instead.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
arch/arm64/include/asm/atomic_ll_sc.h
arch/arm64/include/asm/atomic_lse.h
arch/arm64/include/asm/cmpxchg.h

index 4864158..f89f1e4 100644 (file)
@@ -253,4 +253,38 @@ __CMPXCHG_CASE( ,  , mb_8, dmb ish, "memory")
 
 #undef __CMPXCHG_CASE
 
+#define __CMPXCHG_DBL(name, mb, cl)                                    \
+__LL_SC_INLINE int                                                     \
+__LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1,              \
+                                     unsigned long old2,               \
+                                     unsigned long new1,               \
+                                     unsigned long new2,               \
+                                     volatile void *ptr))              \
+{                                                                      \
+       unsigned long tmp, ret;                                         \
+                                                                       \
+       asm volatile("// __cmpxchg_double" #name "\n"                   \
+       "       " #mb "\n"                                              \
+       "1:     ldxp    %0, %1, %2\n"                                   \
+       "       eor     %0, %0, %3\n"                                   \
+       "       eor     %1, %1, %4\n"                                   \
+       "       orr     %1, %0, %1\n"                                   \
+       "       cbnz    %1, 2f\n"                                       \
+       "       stxp    %w0, %5, %6, %2\n"                              \
+       "       cbnz    %w0, 1b\n"                                      \
+       "       " #mb "\n"                                              \
+       "2:"                                                            \
+       : "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr)        \
+       : "r" (old1), "r" (old2), "r" (new1), "r" (new2)                \
+       : cl);                                                          \
+                                                                       \
+       return ret;                                                     \
+}                                                                      \
+__LL_SC_EXPORT(__cmpxchg_double##name);
+
+__CMPXCHG_DBL(   ,        ,         )
+__CMPXCHG_DBL(_mb, dmb ish, "memory")
+
+#undef __CMPXCHG_DBL
+
 #endif /* __ASM_ATOMIC_LL_SC_H */
index b39ae4c..f3cb105 100644 (file)
@@ -388,4 +388,47 @@ __CMPXCHG_CASE(x,  , mb_8, al, "memory")
 #undef __LL_SC_CMPXCHG
 #undef __CMPXCHG_CASE
 
+#define __LL_SC_CMPXCHG_DBL(op)        __LL_SC_CALL(__cmpxchg_double##op)
+
+#define __CMPXCHG_DBL(name, mb, cl...)                                 \
+static inline int __cmpxchg_double##name(unsigned long old1,           \
+                                        unsigned long old2,            \
+                                        unsigned long new1,            \
+                                        unsigned long new2,            \
+                                        volatile void *ptr)            \
+{                                                                      \
+       unsigned long oldval1 = old1;                                   \
+       unsigned long oldval2 = old2;                                   \
+       register unsigned long x0 asm ("x0") = old1;                    \
+       register unsigned long x1 asm ("x1") = old2;                    \
+       register unsigned long x2 asm ("x2") = new1;                    \
+       register unsigned long x3 asm ("x3") = new2;                    \
+       register unsigned long x4 asm ("x4") = (unsigned long)ptr;      \
+                                                                       \
+       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
+       /* LL/SC */                                                     \
+       "       nop\n"                                                  \
+       "       nop\n"                                                  \
+       "       nop\n"                                                  \
+       __LL_SC_CMPXCHG_DBL(name),                                      \
+       /* LSE atomics */                                               \
+       "       casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\
+       "       eor     %[old1], %[old1], %[oldval1]\n"                 \
+       "       eor     %[old2], %[old2], %[oldval2]\n"                 \
+       "       orr     %[old1], %[old1], %[old2]")                     \
+       : [old1] "+r" (x0), [old2] "+r" (x1),                           \
+         [v] "+Q" (*(unsigned long *)ptr)                              \
+       : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4),             \
+         [oldval1] "r" (oldval1), [oldval2] "r" (oldval2)              \
+       : "x30" , ##cl);                                                \
+                                                                       \
+       return x0;                                                      \
+}
+
+__CMPXCHG_DBL(   ,   )
+__CMPXCHG_DBL(_mb, al, "memory")
+
+#undef __LL_SC_CMPXCHG_DBL
+#undef __CMPXCHG_DBL
+
 #endif /* __ASM_ATOMIC_LSE_H */
index 60a5581..f702126 100644 (file)
@@ -128,51 +128,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
        unreachable();
 }
 
-#define system_has_cmpxchg_double()     1
-
-static inline int __cmpxchg_double(volatile void *ptr1, volatile void *ptr2,
-               unsigned long old1, unsigned long old2,
-               unsigned long new1, unsigned long new2, int size)
-{
-       unsigned long loop, lost;
-
-       switch (size) {
-       case 8:
-               VM_BUG_ON((unsigned long *)ptr2 - (unsigned long *)ptr1 != 1);
-               do {
-                       asm volatile("// __cmpxchg_double8\n"
-                       "       ldxp    %0, %1, %2\n"
-                       "       eor     %0, %0, %3\n"
-                       "       eor     %1, %1, %4\n"
-                       "       orr     %1, %0, %1\n"
-                       "       mov     %w0, #0\n"
-                       "       cbnz    %1, 1f\n"
-                       "       stxp    %w0, %5, %6, %2\n"
-                       "1:\n"
-                               : "=&r"(loop), "=&r"(lost), "+Q" (*(u64 *)ptr1)
-                               : "r" (old1), "r"(old2), "r"(new1), "r"(new2));
-               } while (loop);
-               break;
-       default:
-               BUILD_BUG();
-       }
-
-       return !lost;
-}
-
-static inline int __cmpxchg_double_mb(volatile void *ptr1, volatile void *ptr2,
-                       unsigned long old1, unsigned long old2,
-                       unsigned long new1, unsigned long new2, int size)
-{
-       int ret;
-
-       smp_mb();
-       ret = __cmpxchg_double(ptr1, ptr2, old1, old2, new1, new2, size);
-       smp_mb();
-
-       return ret;
-}
-
 static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
                                         unsigned long new, int size)
 {
@@ -210,21 +165,32 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
        __ret; \
 })
 
+#define system_has_cmpxchg_double()     1
+
+#define __cmpxchg_double_check(ptr1, ptr2)                                     \
+({                                                                             \
+       if (sizeof(*(ptr1)) != 8)                                               \
+               BUILD_BUG();                                                    \
+       VM_BUG_ON((unsigned long *)(ptr2) - (unsigned long *)(ptr1) != 1);      \
+})
+
 #define cmpxchg_double(ptr1, ptr2, o1, o2, n1, n2) \
 ({\
        int __ret;\
-       __ret = __cmpxchg_double_mb((ptr1), (ptr2), (unsigned long)(o1), \
-                       (unsigned long)(o2), (unsigned long)(n1), \
-                       (unsigned long)(n2), sizeof(*(ptr1)));\
+       __cmpxchg_double_check(ptr1, ptr2); \
+       __ret = !__cmpxchg_double_mb((unsigned long)(o1), (unsigned long)(o2), \
+                                    (unsigned long)(n1), (unsigned long)(n2), \
+                                    ptr1); \
        __ret; \
 })
 
 #define cmpxchg_double_local(ptr1, ptr2, o1, o2, n1, n2) \
 ({\
        int __ret;\
-       __ret = __cmpxchg_double((ptr1), (ptr2), (unsigned long)(o1), \
-                       (unsigned long)(o2), (unsigned long)(n1), \
-                       (unsigned long)(n2), sizeof(*(ptr1)));\
+       __cmpxchg_double_check(ptr1, ptr2); \
+       __ret = !__cmpxchg_double((unsigned long)(o1), (unsigned long)(o2), \
+                                 (unsigned long)(n1), (unsigned long)(n2), \
+                                 ptr1); \
        __ret; \
 })