[S390] Inline assembly cleanup.

Major cleanup of all s390 inline assemblies. They now have a common
coding style. Quite a few have been shortened, mainly by using register
asm variables. Use of the EX_TABLE macro helps  as well. The atomic ops,
bit ops and locking inlines new use the Q-constraint if a newer gcc
is used.  That results in slightly better code.

Thanks to Christian Borntraeger for proof reading the changes.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/math-emu/math.c b/arch/s390/math-emu/math.c
index b4957c8..6b9aec5 100644
--- a/arch/s390/math-emu/math.c
+++ b/arch/s390/math-emu/math.c
@@ -1564,52 +1564,52 @@
 }
 
 static inline void emu_load_regd(int reg) {
-        if ((reg&9) != 0)         /* test if reg in {0,2,4,6} */
+	if ((reg&9) != 0)	/* test if reg in {0,2,4,6} */
                 return;
-        asm volatile (            /* load reg from fp_regs.fprs[reg] */
-                "     bras  1,0f\n"
-                "     ld    0,0(%1)\n"
-                "0:   ex    %0,0(1)"
-                : /* no output */
-                : "a" (reg<<4),"a" (&current->thread.fp_regs.fprs[reg].d)
-                : "1" );
+	asm volatile(		/* load reg from fp_regs.fprs[reg] */
+		"	bras	1,0f\n"
+		"	ld	0,0(%1)\n"
+		"0:	ex	%0,0(1)"
+		: /* no output */
+		: "a" (reg<<4),"a" (&current->thread.fp_regs.fprs[reg].d)
+		: "1");
 }
 
 static inline void emu_load_rege(int reg) {
-        if ((reg&9) != 0)         /* test if reg in {0,2,4,6} */
+	if ((reg&9) != 0)	/* test if reg in {0,2,4,6} */
                 return;
-        asm volatile (            /* load reg from fp_regs.fprs[reg] */
-                "     bras  1,0f\n"
-                "     le    0,0(%1)\n"
-                "0:   ex    %0,0(1)"
-                : /* no output */
-                : "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].f)
-                : "1" );
+	asm volatile(		/* load reg from fp_regs.fprs[reg] */
+		"	bras	1,0f\n"
+		"	le	0,0(%1)\n"
+		"0:	ex	%0,0(1)"
+		: /* no output */
+		: "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].f)
+		: "1");
 }
 
 static inline void emu_store_regd(int reg) {
-        if ((reg&9) != 0)         /* test if reg in {0,2,4,6} */
+	if ((reg&9) != 0)	/* test if reg in {0,2,4,6} */
                 return;
-        asm volatile (            /* store reg to fp_regs.fprs[reg] */
-                "     bras  1,0f\n"
-                "     std   0,0(%1)\n"
-                "0:   ex    %0,0(1)"
-                : /* no output */
-                : "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].d)
-                : "1" );
+	asm volatile(		/* store reg to fp_regs.fprs[reg] */
+		"	bras	1,0f\n"
+		"	std	0,0(%1)\n"
+		"0:	ex	%0,0(1)"
+		: /* no output */
+		: "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].d)
+		: "1");
 }
 
 
 static inline void emu_store_rege(int reg) {
-        if ((reg&9) != 0)         /* test if reg in {0,2,4,6} */
+	if ((reg&9) != 0)	/* test if reg in {0,2,4,6} */
                 return;
-        asm volatile (            /* store reg to fp_regs.fprs[reg] */
-                "     bras  1,0f\n"
-                "     ste   0,0(%1)\n"
-                "0:   ex    %0,0(1)"
-                : /* no output */
-                : "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].f)
-                : "1" );
+	asm volatile(		/* store reg to fp_regs.fprs[reg] */
+		"	bras	1,0f\n"
+		"	ste	0,0(%1)\n"
+		"0:	ex	%0,0(1)"
+		: /* no output */
+		: "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].f)
+		: "1");
 }
 
 int math_emu_b3(__u8 *opcode, struct pt_regs * regs) {
@@ -2089,23 +2089,22 @@
 
         if ((opc & 0x90) == 0) {           /* test if rx in {0,2,4,6} */
                 /* we got an exception therfore ry can't be in {0,2,4,6} */
-                __asm__ __volatile (       /* load rx from fp_regs.fprs[ry] */
-                        "     bras  1,0f\n"
-                        "     ld    0,0(%1)\n"
-                        "0:   ex    %0,0(1)"
-                        : /* no output */
-                        : "a" (opc & 0xf0),
-                          "a" (&fp_regs->fprs[opc & 0xf].d)
-                        : "1" );
+		asm volatile(		/* load rx from fp_regs.fprs[ry] */
+			"	bras	1,0f\n"
+			"	ld	0,0(%1)\n"
+			"0:	ex	%0,0(1)"
+			: /* no output */
+			: "a" (opc & 0xf0), "a" (&fp_regs->fprs[opc & 0xf].d)
+			: "1");
         } else if ((opc & 0x9) == 0) {     /* test if ry in {0,2,4,6} */
-                __asm__ __volatile (       /* store ry to fp_regs.fprs[rx] */
-                        "     bras  1,0f\n"
-                        "     std   0,0(%1)\n"
-                        "0:   ex    %0,0(1)"
-                        : /* no output */
-                        : "a" ((opc & 0xf) << 4),
-                          "a" (&fp_regs->fprs[(opc & 0xf0)>>4].d)
-                        : "1" );
+		asm volatile (		/* store ry to fp_regs.fprs[rx] */
+			"	bras	1,0f\n"
+			"	std	0,0(%1)\n"
+			"0:	ex	%0,0(1)"
+			: /* no output */
+			: "a" ((opc & 0xf) << 4),
+			  "a" (&fp_regs->fprs[(opc & 0xf0)>>4].d)
+			: "1");
         } else  /* move fp_regs.fprs[ry] to fp_regs.fprs[rx] */
                 fp_regs->fprs[(opc & 0xf0) >> 4] = fp_regs->fprs[opc & 0xf];
 	return 0;
@@ -2120,23 +2119,22 @@
 
         if ((opc & 0x90) == 0) {           /* test if rx in {0,2,4,6} */
                 /* we got an exception therfore ry can't be in {0,2,4,6} */
-                __asm__ __volatile (       /* load rx from fp_regs.fprs[ry] */
-                        "     bras  1,0f\n"
-                        "     le    0,0(%1)\n"
-                        "0:   ex    %0,0(1)"
-                        : /* no output */
-                        : "a" (opc & 0xf0),
-                          "a" (&fp_regs->fprs[opc & 0xf].f)
-                        : "1" );
+		asm volatile(		/* load rx from fp_regs.fprs[ry] */
+			"	bras	1,0f\n"
+			"	le	0,0(%1)\n"
+			"0:	ex	%0,0(1)"
+			: /* no output */
+			: "a" (opc & 0xf0), "a" (&fp_regs->fprs[opc & 0xf].f)
+			: "1");
         } else if ((opc & 0x9) == 0) {     /* test if ry in {0,2,4,6} */
-                __asm__ __volatile (       /* store ry to fp_regs.fprs[rx] */
-                        "     bras  1,0f\n"
-                        "     ste   0,0(%1)\n"
-                        "0:   ex    %0,0(1)"
-                        : /* no output */
-                        : "a" ((opc & 0xf) << 4),
-                          "a" (&fp_regs->fprs[(opc & 0xf0) >> 4].f)
-                        : "1" );
+		asm volatile(		/* store ry to fp_regs.fprs[rx] */
+			"	bras	1,0f\n"
+			"	ste	0,0(%1)\n"
+			"0:	ex	%0,0(1)"
+			: /* no output */
+			: "a" ((opc & 0xf) << 4),
+			  "a" (&fp_regs->fprs[(opc & 0xf0) >> 4].f)
+			: "1");
         } else  /* move fp_regs.fprs[ry] to fp_regs.fprs[rx] */
                 fp_regs->fprs[(opc & 0xf0) >> 4] = fp_regs->fprs[opc & 0xf];
 	return 0;
diff --git a/arch/s390/math-emu/sfp-util.h b/arch/s390/math-emu/sfp-util.h
index ab556b6..5b6ca45 100644
--- a/arch/s390/math-emu/sfp-util.h
+++ b/arch/s390/math-emu/sfp-util.h
@@ -4,48 +4,51 @@
 #include <asm/byteorder.h>
 
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) ({		\
-        unsigned int __sh = (ah);			\
-        unsigned int __sl = (al);			\
-        __asm__ ("   alr  %1,%3\n"			\
-                 "   brc  12,0f\n"			\
-                 "   ahi  %0,1\n"			\
-                 "0: alr  %0,%2"			\
-                 : "+&d" (__sh), "+d" (__sl)		\
-                 : "d" (bh), "d" (bl) : "cc" );		\
-        (sh) = __sh;					\
-        (sl) = __sl;					\
+	unsigned int __sh = (ah);			\
+	unsigned int __sl = (al);			\
+	asm volatile(					\
+		"	alr	%1,%3\n"		\
+		"	brc	12,0f\n"		\
+		"	ahi	%0,1\n"			\
+		"0:	alr  %0,%2"			\
+		: "+&d" (__sh), "+d" (__sl)		\
+		: "d" (bh), "d" (bl) : "cc");		\
+	(sh) = __sh;					\
+	(sl) = __sl;					\
 })
 
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) ({		\
-       unsigned int __sh = (ah);			\
-       unsigned int __sl = (al);			\
-       __asm__ ("   slr  %1,%3\n"			\
-                "   brc  3,0f\n"			\
-                "   ahi  %0,-1\n"			\
-                "0: slr  %0,%2"				\
-                : "+&d" (__sh), "+d" (__sl)		\
-                : "d" (bh), "d" (bl) : "cc" );		\
-       (sh) = __sh;					\
-       (sl) = __sl;					\
+	unsigned int __sh = (ah);			\
+	unsigned int __sl = (al);			\
+	asm volatile(					\
+		"	slr	%1,%3\n"		\
+		"	brc	3,0f\n"			\
+		"	ahi	%0,-1\n"		\
+		"0:	slr	%0,%2"			\
+		: "+&d" (__sh), "+d" (__sl)		\
+		: "d" (bh), "d" (bl) : "cc");		\
+	(sh) = __sh;					\
+	(sl) = __sl;					\
 })
 
 /* a umul b = a mul b + (a>=2<<31) ? b<<32:0 + (b>=2<<31) ? a<<32:0 */
 #define umul_ppmm(wh, wl, u, v) ({			\
-        unsigned int __wh = u;				\
-        unsigned int __wl = v;				\
-        __asm__ ("   ltr  1,%0\n"			\
-                 "   mr   0,%1\n"			\
-                 "   jnm  0f\n"				\
-                 "   alr  0,%1\n"			\
-                 "0: ltr  %1,%1\n"			\
-                 "   jnm  1f\n"				\
-                 "   alr  0,%0\n"			\
-                 "1: lr   %0,0\n"			\
-                 "   lr   %1,1\n"			\
-                 : "+d" (__wh), "+d" (__wl)		\
-                 : : "0", "1", "cc" );			\
-        wh = __wh;					\
-        wl = __wl;					\
+	unsigned int __wh = u;				\
+	unsigned int __wl = v;				\
+	asm volatile(					\
+		"	ltr	1,%0\n"			\
+		"	mr	0,%1\n"			\
+		"	jnm	0f\n"				\
+		"	alr	0,%1\n"			\
+		"0:	ltr	%1,%1\n"			\
+		"	jnm	1f\n"				\
+		"	alr	0,%0\n"			\
+		"1:	lr	%0,0\n"			\
+		"	lr	%1,1\n"			\
+		: "+d" (__wh), "+d" (__wl)		\
+		: : "0", "1", "cc");			\
+	wh = __wh;					\
+	wl = __wl;					\
 })
 
 #define udiv_qrnnd(q, r, n1, n0, d)			\