[S390] Inline assembly cleanup.

Major cleanup of all s390 inline assemblies. They now have a common
coding style. Quite a few have been shortened, mainly by using register
asm variables. Use of the EX_TABLE macro helps  as well. The atomic ops,
bit ops and locking inlines new use the Q-constraint if a newer gcc
is used.  That results in slightly better code.

Thanks to Christian Borntraeger for proof reading the changes.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index efd836c..2b13708 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -105,63 +105,6 @@
 };
 
 /*
- * Standard fixup and ex_table sections for crypt_s390 inline functions.
- * label 0: the s390 crypto operation
- * label 1: just after 1 to catch illegal operation exception
- *          (unsupported model)
- * label 6: the return point after fixup
- * label 7: set error value if exception _in_ crypto operation
- * label 8: set error value if illegal operation exception
- * [ret] is the variable to receive the error code
- * [ERR] is the error code value
- */
-#ifndef CONFIG_64BIT
-#define __crypt_s390_fixup \
-	".section .fixup,\"ax\" \n"	\
-	"7:	lhi	%0,%h[e1] \n"	\
-	"	bras	1,9f \n"	\
-	"	.long	6b \n"		\
-	"8:	lhi	%0,%h[e2] \n"	\
-	"	bras	1,9f \n"	\
-	"	.long	6b \n"		\
-	"9:	l	1,0(1) \n"	\
-	"	br	1 \n"		\
-	".previous \n"			\
-	".section __ex_table,\"a\" \n"	\
-	"	.align	4 \n"		\
-	"	.long	0b,7b \n"	\
-	"	.long	1b,8b \n"	\
-	".previous"
-#else /* CONFIG_64BIT */
-#define __crypt_s390_fixup \
-	".section .fixup,\"ax\" \n"	\
-	"7:	lhi	%0,%h[e1] \n"	\
-	"	jg	6b \n"		\
-	"8:	lhi	%0,%h[e2] \n"	\
-	"	jg	6b \n"		\
-	".previous\n"			\
-	".section __ex_table,\"a\" \n"	\
-	"	.align	8 \n"		\
-	"	.quad	0b,7b \n"	\
-	"	.quad	1b,8b \n"	\
-	".previous"
-#endif /* CONFIG_64BIT */
-
-/*
- * Standard code for setting the result of s390 crypto instructions.
- * %0: the register which will receive the result
- * [result]: the register containing the result (e.g. second operand length
- * to compute number of processed bytes].
- */
-#ifndef CONFIG_64BIT
-#define __crypt_s390_set_result \
-	"	lr	%0,%[result] \n"
-#else /* CONFIG_64BIT */
-#define __crypt_s390_set_result \
-	"	lgr	%0,%[result] \n"
-#endif
-
-/*
  * Executes the KM (CIPHER MESSAGE) operation of the CPU.
  * @param func: the function code passed to KM; see crypt_s390_km_func
  * @param param: address of parameter block; see POP for details on each func
@@ -176,28 +119,24 @@
 {
 	register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
 	register void* __param asm("1") = param;
-	register u8* __dest asm("4") = dest;
 	register const u8* __src asm("2") = src;
 	register long __src_len asm("3") = src_len;
+	register u8* __dest asm("4") = dest;
 	int ret;
 
-	ret = 0;
-	__asm__ __volatile__ (
-		"0:	.insn	rre,0xB92E0000,%1,%2 \n" /* KM opcode */
+	asm volatile(
+		"0:	.insn	rre,0xb92e0000,%3,%1 \n" /* KM opcode */
 		"1:	brc	1,0b \n" /* handle partial completion */
-		__crypt_s390_set_result
-		"6:	\n"
-		__crypt_s390_fixup
-		: "+d" (ret), "+a" (__dest), "+a" (__src),
-		  [result] "+d" (__src_len)
-		: [e1] "K" (-EFAULT), [e2] "K" (-ENOSYS), "d" (__func),
-		  "a" (__param)
-		: "cc", "memory"
-	);
-	if (ret >= 0 && func & CRYPT_S390_FUNC_MASK){
-		ret = src_len - ret;
-	}
-	return ret;
+		"	ahi	%0,%h7\n"
+		"2:	ahi	%0,%h8\n"
+		"3:\n"
+		EX_TABLE(0b,3b) EX_TABLE(1b,2b)
+		: "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest)
+		: "d" (__func), "a" (__param), "0" (-EFAULT),
+		  "K" (ENOSYS), "K" (-ENOSYS + EFAULT) : "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
 }
 
 /*
@@ -215,28 +154,24 @@
 {
 	register long __func asm("0") = func & CRYPT_S390_FUNC_MASK;
 	register void* __param asm("1") = param;
-	register u8* __dest asm("4") = dest;
 	register const u8* __src asm("2") = src;
 	register long __src_len asm("3") = src_len;
+	register u8* __dest asm("4") = dest;
 	int ret;
 
-	ret = 0;
-	__asm__ __volatile__ (
-		"0:	.insn	rre,0xB92F0000,%1,%2 \n" /* KMC opcode */
+	asm volatile(
+		"0:	.insn	rre,0xb92f0000,%3,%1 \n" /* KMC opcode */
 		"1:	brc	1,0b \n" /* handle partial completion */
-		__crypt_s390_set_result
-		"6:	\n"
-		__crypt_s390_fixup
-		: "+d" (ret), "+a" (__dest), "+a" (__src),
-		  [result] "+d" (__src_len)
-		: [e1] "K" (-EFAULT), [e2] "K" (-ENOSYS), "d" (__func),
-		  "a" (__param)
-		: "cc", "memory"
-	);
-	if (ret >= 0 && func & CRYPT_S390_FUNC_MASK){
-		ret = src_len - ret;
-	}
-	return ret;
+		"	ahi	%0,%h7\n"
+		"2:	ahi	%0,%h8\n"
+		"3:\n"
+		EX_TABLE(0b,3b) EX_TABLE(1b,2b)
+		: "=d" (ret), "+a" (__src), "+d" (__src_len), "+a" (__dest)
+		: "d" (__func), "a" (__param), "0" (-EFAULT),
+		  "K" (ENOSYS), "K" (-ENOSYS + EFAULT) : "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
 }
 
 /*
@@ -258,22 +193,19 @@
 	register long __src_len asm("3") = src_len;
 	int ret;
 
-	ret = 0;
-	__asm__ __volatile__ (
-		"0:	.insn	rre,0xB93E0000,%1,%1 \n" /* KIMD opcode */
-		"1:	brc	1,0b \n" /* handle partical completion */
-		__crypt_s390_set_result
-		"6:	\n"
-		__crypt_s390_fixup
-		: "+d" (ret), "+a" (__src), [result] "+d" (__src_len)
-		: [e1] "K" (-EFAULT), [e2] "K" (-ENOSYS), "d" (__func),
-		  "a" (__param)
-		: "cc", "memory"
-	);
-	if (ret >= 0 && (func & CRYPT_S390_FUNC_MASK)){
-		ret = src_len - ret;
-	}
-	return ret;
+	asm volatile(
+		"0:	.insn	rre,0xb93e0000,%1,%1 \n" /* KIMD opcode */
+		"1:	brc	1,0b \n" /* handle partial completion */
+		"	ahi	%0,%h6\n"
+		"2:	ahi	%0,%h7\n"
+		"3:\n"
+		EX_TABLE(0b,3b) EX_TABLE(1b,2b)
+		: "=d" (ret), "+a" (__src), "+d" (__src_len)
+		: "d" (__func), "a" (__param), "0" (-EFAULT),
+		  "K" (ENOSYS), "K" (-ENOSYS + EFAULT) : "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
 }
 
 /*
@@ -294,22 +226,19 @@
 	register long __src_len asm("3") = src_len;
 	int ret;
 
-	ret = 0;
-	__asm__ __volatile__ (
-		"0:	.insn	rre,0xB93F0000,%1,%1 \n" /* KLMD opcode */
-		"1:	brc	1,0b \n" /* handle partical completion */
-		__crypt_s390_set_result
-		"6:	\n"
-		__crypt_s390_fixup
-		: "+d" (ret), "+a" (__src), [result] "+d" (__src_len)
-		: [e1] "K" (-EFAULT), [e2] "K" (-ENOSYS), "d" (__func),
-		  "a" (__param)
-		: "cc", "memory"
-	);
-	if (ret >= 0 && func & CRYPT_S390_FUNC_MASK){
-		ret = src_len - ret;
-	}
-	return ret;
+	asm volatile(
+		"0:	.insn	rre,0xb93f0000,%1,%1 \n" /* KLMD opcode */
+		"1:	brc	1,0b \n" /* handle partial completion */
+		"	ahi	%0,%h6\n"
+		"2:	ahi	%0,%h7\n"
+		"3:\n"
+		EX_TABLE(0b,3b) EX_TABLE(1b,2b)
+		: "=d" (ret), "+a" (__src), "+d" (__src_len)
+		: "d" (__func), "a" (__param), "0" (-EFAULT),
+		  "K" (ENOSYS), "K" (-ENOSYS + EFAULT) : "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
 }
 
 /*
@@ -331,22 +260,19 @@
 	register long __src_len asm("3") = src_len;
 	int ret;
 
-	ret = 0;
-	__asm__ __volatile__ (
-		"0:	.insn	rre,0xB91E0000,%5,%5 \n" /* KMAC opcode */
-		"1:	brc	1,0b \n" /* handle partical completion */
-		__crypt_s390_set_result
-		"6:	\n"
-		__crypt_s390_fixup
-		: "+d" (ret), "+a" (__src), [result] "+d" (__src_len)
-		: [e1] "K" (-EFAULT), [e2] "K" (-ENOSYS), "d" (__func),
-		  "a" (__param)
-		: "cc", "memory"
-	);
-	if (ret >= 0 && func & CRYPT_S390_FUNC_MASK){
-		ret = src_len - ret;
-	}
-	return ret;
+	asm volatile(
+		"0:	.insn	rre,0xb91e0000,%1,%1 \n" /* KLAC opcode */
+		"1:	brc	1,0b \n" /* handle partial completion */
+		"	ahi	%0,%h6\n"
+		"2:	ahi	%0,%h7\n"
+		"3:\n"
+		EX_TABLE(0b,3b) EX_TABLE(1b,2b)
+		: "=d" (ret), "+a" (__src), "+d" (__src_len)
+		: "d" (__func), "a" (__param), "0" (-EFAULT),
+		  "K" (ENOSYS), "K" (-ENOSYS + EFAULT) : "cc", "memory");
+	if (ret < 0)
+		return ret;
+	return (func & CRYPT_S390_FUNC_MASK) ? src_len - __src_len : __src_len;
 }
 
 /**
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 684384f..443fa37 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -333,22 +333,14 @@
 	register unsigned long _subcode asm("0") = subcode;
 	register unsigned long _size asm("1") = size;
 
-	asm volatile ("   diag    %2,%0,0x204\n"
-		      "0: \n" ".section __ex_table,\"a\"\n"
-#ifndef __s390x__
-		      "    .align 4\n"
-		      "    .long  0b,0b\n"
-#else
-		      "    .align 8\n"
-		      "    .quad  0b,0b\n"
-#endif
-		      ".previous":"+d" (_subcode), "+d"(_size)
-		      :"d"(addr)
-		      :"memory");
+	asm volatile(
+		"	diag	%2,%0,0x204\n"
+		"0:\n"
+		EX_TABLE(0b,0b)
+		: "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory");
 	if (_subcode)
 		return -1;
-	else
-		return _size;
+	return _size;
 }
 
 /*
@@ -491,8 +483,7 @@
 
 static void diag224(void *ptr)
 {
-	asm volatile("   diag    %0,%1,0x224\n"
-		     : :"d" (0), "d"(ptr) : "memory");
+	asm volatile("diag %0,%1,0x224" : :"d" (0), "d"(ptr) : "memory");
 }
 
 static int diag224_get_name_table(void)
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 91b2884..c46e3d4 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -544,10 +544,7 @@
 		current->ptrace &= ~PT_DTRACE;
 		task_unlock(current);
 		current->thread.fp_regs.fpc=0;
-		__asm__ __volatile__
-		        ("sr  0,0\n\t"
-		         "sfpc 0,0\n\t"
-			 : : :"0");
+		asm volatile("sfpc %0,0" : : "d" (0));
 	}
         putname(filename);
 out:
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index 4ef44e5..1eae74e 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -25,11 +25,8 @@
  */
 int  __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
 {
-	const int mask = 0x40000000L;
-	unsigned long flags;
-	int return_code;
-	int return_len;
-	int cmdlen;
+	unsigned long flags, cmdlen;
+	int return_code, return_len;
 
 	spin_lock_irqsave(&cpcmd_lock, flags);
 	cmdlen = strlen(cmd);
@@ -38,64 +35,44 @@
 	ASCEBC(cpcmd_buf, cmdlen);
 
 	if (response != NULL && rlen > 0) {
+		register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf;
+		register unsigned long reg3 asm ("3") = (addr_t) response;
+		register unsigned long reg4 asm ("4") = cmdlen | 0x40000000L;
+		register unsigned long reg5 asm ("5") = rlen;
+
 		memset(response, 0, rlen);
+		asm volatile(
 #ifndef CONFIG_64BIT
-		asm volatile (	"lra	2,0(%2)\n"
-				"lr	4,%3\n"
-				"o	4,%6\n"
-				"lra	3,0(%4)\n"
-				"lr	5,%5\n"
-				"diag	2,4,0x8\n"
-				"brc	8, 1f\n"
-				"ar	5, %5\n"
-				"1: \n"
-				"lr	%0,4\n"
-				"lr	%1,5\n"
-				: "=d" (return_code), "=d" (return_len)
-				: "a" (cpcmd_buf), "d" (cmdlen),
-				"a" (response), "d" (rlen), "m" (mask)
-				: "cc", "2", "3", "4", "5" );
+			"	diag	%2,%0,0x8\n"
+			"	brc	8,1f\n"
+			"	ar	%1,%4\n"
 #else /* CONFIG_64BIT */
-                asm volatile (	"lrag	2,0(%2)\n"
-				"lgr	4,%3\n"
-				"o	4,%6\n"
-				"lrag	3,0(%4)\n"
-				"lgr	5,%5\n"
-				"sam31\n"
-				"diag	2,4,0x8\n"
-				"sam64\n"
-				"brc	8, 1f\n"
-				"agr	5, %5\n"
-				"1: \n"
-				"lgr	%0,4\n"
-				"lgr	%1,5\n"
-				: "=d" (return_code), "=d" (return_len)
-				: "a" (cpcmd_buf), "d" (cmdlen),
-				"a" (response), "d" (rlen), "m" (mask)
-				: "cc", "2", "3", "4", "5" );
+			"	sam31\n"
+			"	diag	%2,%0,0x8\n"
+			"	sam64\n"
+			"	brc	8,1f\n"
+			"	agr	%1,%4\n"
 #endif /* CONFIG_64BIT */
+			"1:\n"
+			: "+d" (reg4), "+d" (reg5)
+			: "d" (reg2), "d" (reg3), "d" (rlen) : "cc");
+		return_code = (int) reg4;
+		return_len = (int) reg5;
                 EBCASC(response, rlen);
         } else {
+		register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf;
+		register unsigned long reg3 asm ("3") = cmdlen;
 		return_len = 0;
+		asm volatile(
 #ifndef CONFIG_64BIT
-                asm volatile (	"lra	2,0(%1)\n"
-				"lr	3,%2\n"
-				"diag	2,3,0x8\n"
-				"lr	%0,3\n"
-				: "=d" (return_code)
-				: "a" (cpcmd_buf), "d" (cmdlen)
-				: "2", "3"  );
+			"	diag	%1,%0,0x8\n"
 #else /* CONFIG_64BIT */
-                asm volatile (	"lrag	2,0(%1)\n"
-				"lgr	3,%2\n"
-				"sam31\n"
-				"diag	2,3,0x8\n"
-				"sam64\n"
-				"lgr	%0,3\n"
-				: "=d" (return_code)
-				: "a" (cpcmd_buf), "d" (cmdlen)
-				: "2", "3" );
+			"	sam31\n"
+			"	diag	%1,%0,0x8\n"
+			"	sam64\n"
 #endif /* CONFIG_64BIT */
+			: "+d" (reg3) : "d" (reg2) : "cc");
+		return_code = (int) reg3;
         }
 	spin_unlock_irqrestore(&cpcmd_lock, flags);
 	if (response_code != NULL)
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 6555cc4..1f5e782 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -120,24 +120,15 @@
 
 static int diag308(unsigned long subcode, void *addr)
 {
-	register unsigned long _addr asm("0") = (unsigned long)addr;
+	register unsigned long _addr asm("0") = (unsigned long) addr;
 	register unsigned long _rc asm("1") = 0;
 
-	asm volatile (
-		"   diag %0,%2,0x308\n"
-		"0: \n"
-		".section __ex_table,\"a\"\n"
-#ifdef CONFIG_64BIT
-		"   .align 8\n"
-		"   .quad 0b, 0b\n"
-#else
-		"   .align 4\n"
-		"   .long 0b, 0b\n"
-#endif
-		".previous\n"
+	asm volatile(
+		"	diag	%0,%2,0x308\n"
+		"0:\n"
+		EX_TABLE(0b,0b)
 		: "+d" (_addr), "+d" (_rc)
-		: "d" (subcode) : "cc", "memory" );
-
+		: "d" (subcode) : "cc", "memory");
 	return _rc;
 }
 
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index d3cbfa3..6603fbb 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -45,7 +45,7 @@
 #include <asm/irq.h>
 #include <asm/timer.h>
 
-asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
 
 /*
  * Return saved PC of a blocked thread. used in kernel/sched.
@@ -177,7 +177,8 @@
 
 extern void kernel_thread_starter(void);
 
-__asm__(".align 4\n"
+asm(
+	".align 4\n"
 	"kernel_thread_starter:\n"
 	"    la    2,0(10)\n"
 	"    basr  14,9\n"
diff --git a/arch/s390/kernel/semaphore.c b/arch/s390/kernel/semaphore.c
index 8dfb690..191303f 100644
--- a/arch/s390/kernel/semaphore.c
+++ b/arch/s390/kernel/semaphore.c
@@ -26,17 +26,17 @@
 {
 	int old_val, new_val;
 
-        __asm__ __volatile__("   l     %0,0(%3)\n"
-                             "0: ltr   %1,%0\n"
-			     "   jhe   1f\n"
-			     "   lhi   %1,0\n"
-			     "1: ar    %1,%4\n"
-                             "   cs    %0,%1,0(%3)\n"
-                             "   jl    0b\n"
-                             : "=&d" (old_val), "=&d" (new_val),
-			       "=m" (sem->count)
-			     : "a" (&sem->count), "d" (incr), "m" (sem->count)
-			     : "cc" );
+	asm volatile(
+		"	l	%0,0(%3)\n"
+		"0:	ltr	%1,%0\n"
+		"	jhe	1f\n"
+		"	lhi	%1,0\n"
+		"1:	ar	%1,%4\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b\n"
+		: "=&d" (old_val), "=&d" (new_val), "=m" (sem->count)
+		: "a" (&sem->count), "d" (incr), "m" (sem->count)
+		: "cc");
 	return old_val;
 }
 
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index e3d9325..a21cfbb 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -101,7 +101,7 @@
         /*
          * Store processor id in lowcore (used e.g. in timer_interrupt)
          */
-        asm volatile ("stidp %0": "=m" (S390_lowcore.cpu_data.cpu_id));
+	asm volatile("stidp %0": "=m" (S390_lowcore.cpu_data.cpu_id));
         S390_lowcore.cpu_data.cpu_addr = addr;
 
         /*
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index b2e6f4c..a8e6199 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -63,7 +63,7 @@
 static void smp_ext_bitcall_others(ec_bit_sig);
 
 /*
- * Structure and data for smp_call_function(). This is designed to minimise
+5B * Structure and data for smp_call_function(). This is designed to minimise
  * static memory requirements. It also looks cleaner.
  */
 static DEFINE_SPINLOCK(call_lock);
@@ -418,59 +418,49 @@
 /*
  * parameter area for the set/clear control bit callbacks
  */
-typedef struct
-{
-	__u16 start_ctl;
-	__u16 end_ctl;
+struct ec_creg_mask_parms {
 	unsigned long orvals[16];
 	unsigned long andvals[16];
-} ec_creg_mask_parms;
+};
 
 /*
  * callback for setting/clearing control bits
  */
 void smp_ctl_bit_callback(void *info) {
-	ec_creg_mask_parms *pp;
+	struct ec_creg_mask_parms *pp = info;
 	unsigned long cregs[16];
 	int i;
 	
-	pp = (ec_creg_mask_parms *) info;
-	__ctl_store(cregs[pp->start_ctl], pp->start_ctl, pp->end_ctl);
-	for (i = pp->start_ctl; i <= pp->end_ctl; i++)
+	__ctl_store(cregs, 0, 15);
+	for (i = 0; i <= 15; i++)
 		cregs[i] = (cregs[i] & pp->andvals[i]) | pp->orvals[i];
-	__ctl_load(cregs[pp->start_ctl], pp->start_ctl, pp->end_ctl);
+	__ctl_load(cregs, 0, 15);
 }
 
 /*
  * Set a bit in a control register of all cpus
  */
-void smp_ctl_set_bit(int cr, int bit) {
-        ec_creg_mask_parms parms;
+void smp_ctl_set_bit(int cr, int bit)
+{
+	struct ec_creg_mask_parms parms;
 
-	parms.start_ctl = cr;
-	parms.end_ctl = cr;
+	memset(&parms.orvals, 0, sizeof(parms.orvals));
+	memset(&parms.andvals, 0xff, sizeof(parms.andvals));
 	parms.orvals[cr] = 1 << bit;
-	parms.andvals[cr] = -1L;
-	preempt_disable();
-	smp_call_function(smp_ctl_bit_callback, &parms, 0, 1);
-        __ctl_set_bit(cr, bit);
-	preempt_enable();
+	on_each_cpu(smp_ctl_bit_callback, &parms, 0, 1);
 }
 
 /*
  * Clear a bit in a control register of all cpus
  */
-void smp_ctl_clear_bit(int cr, int bit) {
-        ec_creg_mask_parms parms;
+void smp_ctl_clear_bit(int cr, int bit)
+{
+	struct ec_creg_mask_parms parms;
 
-	parms.start_ctl = cr;
-	parms.end_ctl = cr;
-	parms.orvals[cr] = 0;
+	memset(&parms.orvals, 0, sizeof(parms.orvals));
+	memset(&parms.andvals, 0xff, sizeof(parms.andvals));
 	parms.andvals[cr] = ~(1L << bit);
-	preempt_disable();
-	smp_call_function(smp_ctl_bit_callback, &parms, 0, 1);
-        __ctl_clear_bit(cr, bit);
-	preempt_enable();
+	on_each_cpu(smp_ctl_bit_callback, &parms, 0, 1);
 }
 
 /*
@@ -650,9 +640,9 @@
 	sf->gprs[9] = (unsigned long) sf;
 	cpu_lowcore->save_area[15] = (unsigned long) sf;
 	__ctl_store(cpu_lowcore->cregs_save_area[0], 0, 15);
-	__asm__ __volatile__("stam  0,15,0(%0)"
-			     : : "a" (&cpu_lowcore->access_regs_save_area)
-			     : "memory");
+	asm volatile(
+		"	stam	0,15,0(%0)"
+		: : "a" (&cpu_lowcore->access_regs_save_area) : "memory");
 	cpu_lowcore->percpu_offset = __per_cpu_offset[cpu];
         cpu_lowcore->current_task = (unsigned long) idle;
         cpu_lowcore->cpu_data.cpu_nr = cpu;
@@ -708,7 +698,7 @@
 __cpu_disable(void)
 {
 	unsigned long flags;
-	ec_creg_mask_parms cr_parms;
+	struct ec_creg_mask_parms cr_parms;
 	int cpu = smp_processor_id();
 
 	spin_lock_irqsave(&smp_reserve_lock, flags);
@@ -724,30 +714,21 @@
 		pfault_fini();
 #endif
 
-	/* disable all external interrupts */
+	memset(&cr_parms.orvals, 0, sizeof(cr_parms.orvals));
+	memset(&cr_parms.andvals, 0xff, sizeof(cr_parms.andvals));
 
-	cr_parms.start_ctl = 0;
-	cr_parms.end_ctl = 0;
+	/* disable all external interrupts */
 	cr_parms.orvals[0] = 0;
 	cr_parms.andvals[0] = ~(1<<15 | 1<<14 | 1<<13 | 1<<12 |
 				1<<11 | 1<<10 | 1<< 6 | 1<< 4);
-	smp_ctl_bit_callback(&cr_parms);
-
 	/* disable all I/O interrupts */
-
-	cr_parms.start_ctl = 6;
-	cr_parms.end_ctl = 6;
 	cr_parms.orvals[6] = 0;
 	cr_parms.andvals[6] = ~(1<<31 | 1<<30 | 1<<29 | 1<<28 |
 				1<<27 | 1<<26 | 1<<25 | 1<<24);
-	smp_ctl_bit_callback(&cr_parms);
-
 	/* disable most machine checks */
-
-	cr_parms.start_ctl = 14;
-	cr_parms.end_ctl = 14;
 	cr_parms.orvals[14] = 0;
 	cr_parms.andvals[14] = ~(1<<28 | 1<<27 | 1<<26 | 1<<25 | 1<<24);
+
 	smp_ctl_bit_callback(&cr_parms);
 
 	spin_unlock_irqrestore(&smp_reserve_lock, flags);
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 74e6178..1981c61 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -351,10 +351,12 @@
 	int cc;
 
         /* kick the TOD clock */
-        asm volatile ("STCK 0(%1)\n\t"
-                      "IPM  %0\n\t"
-                      "SRL  %0,28" : "=r" (cc) : "a" (&init_timer_cc) 
-				   : "memory", "cc");
+	asm volatile(
+		"	stck	0(%2)\n"
+		"	ipm	%0\n"
+		"	srl	%0,28"
+		: "=d" (cc), "=m" (init_timer_cc)
+		: "a" (&init_timer_cc) : "cc");
         switch (cc) {
         case 0: /* clock in set state: all is fine */
                 break;
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index c4982c9..3eb4fab 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -597,8 +597,7 @@
 		local_irq_enable();
 
 	if (MACHINE_HAS_IEEE)
-		__asm__ volatile ("stfpc %0\n\t" 
-				  : "=m" (current->thread.fp_regs.fpc));
+		asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
 
 #ifdef CONFIG_MATHEMU
         else if (regs->psw.mask & PSW_MASK_PSTATE) {
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 468f4ea..027c474 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -27,9 +27,7 @@
          * yield the megahertz number of the cpu. The important function
          * is udelay and that is done using the tod clock. -- martin.
          */
-        __asm__ __volatile__(
-                "0: brct %0,0b"
-                : /* no outputs */ : "r" ((loops/2) + 1));
+	asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1));
 }
 
 /*
@@ -38,13 +36,12 @@
  */
 void __udelay(unsigned long usecs)
 {
-        uint64_t start_cc, end_cc;
+	uint64_t start_cc;
 
         if (usecs == 0)
                 return;
-        asm volatile ("STCK %0" : "=m" (start_cc));
+	start_cc = get_clock();
         do {
 		cpu_relax();
-                asm volatile ("STCK %0" : "=m" (end_cc));
-        } while (((end_cc - start_cc)/4096) < usecs);
+	} while (((get_clock() - start_cc)/4096) < usecs);
 }
diff --git a/arch/s390/math-emu/math.c b/arch/s390/math-emu/math.c
index b4957c8..6b9aec5 100644
--- a/arch/s390/math-emu/math.c
+++ b/arch/s390/math-emu/math.c
@@ -1564,52 +1564,52 @@
 }
 
 static inline void emu_load_regd(int reg) {
-        if ((reg&9) != 0)         /* test if reg in {0,2,4,6} */
+	if ((reg&9) != 0)	/* test if reg in {0,2,4,6} */
                 return;
-        asm volatile (            /* load reg from fp_regs.fprs[reg] */
-                "     bras  1,0f\n"
-                "     ld    0,0(%1)\n"
-                "0:   ex    %0,0(1)"
-                : /* no output */
-                : "a" (reg<<4),"a" (&current->thread.fp_regs.fprs[reg].d)
-                : "1" );
+	asm volatile(		/* load reg from fp_regs.fprs[reg] */
+		"	bras	1,0f\n"
+		"	ld	0,0(%1)\n"
+		"0:	ex	%0,0(1)"
+		: /* no output */
+		: "a" (reg<<4),"a" (&current->thread.fp_regs.fprs[reg].d)
+		: "1");
 }
 
 static inline void emu_load_rege(int reg) {
-        if ((reg&9) != 0)         /* test if reg in {0,2,4,6} */
+	if ((reg&9) != 0)	/* test if reg in {0,2,4,6} */
                 return;
-        asm volatile (            /* load reg from fp_regs.fprs[reg] */
-                "     bras  1,0f\n"
-                "     le    0,0(%1)\n"
-                "0:   ex    %0,0(1)"
-                : /* no output */
-                : "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].f)
-                : "1" );
+	asm volatile(		/* load reg from fp_regs.fprs[reg] */
+		"	bras	1,0f\n"
+		"	le	0,0(%1)\n"
+		"0:	ex	%0,0(1)"
+		: /* no output */
+		: "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].f)
+		: "1");
 }
 
 static inline void emu_store_regd(int reg) {
-        if ((reg&9) != 0)         /* test if reg in {0,2,4,6} */
+	if ((reg&9) != 0)	/* test if reg in {0,2,4,6} */
                 return;
-        asm volatile (            /* store reg to fp_regs.fprs[reg] */
-                "     bras  1,0f\n"
-                "     std   0,0(%1)\n"
-                "0:   ex    %0,0(1)"
-                : /* no output */
-                : "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].d)
-                : "1" );
+	asm volatile(		/* store reg to fp_regs.fprs[reg] */
+		"	bras	1,0f\n"
+		"	std	0,0(%1)\n"
+		"0:	ex	%0,0(1)"
+		: /* no output */
+		: "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].d)
+		: "1");
 }
 
 
 static inline void emu_store_rege(int reg) {
-        if ((reg&9) != 0)         /* test if reg in {0,2,4,6} */
+	if ((reg&9) != 0)	/* test if reg in {0,2,4,6} */
                 return;
-        asm volatile (            /* store reg to fp_regs.fprs[reg] */
-                "     bras  1,0f\n"
-                "     ste   0,0(%1)\n"
-                "0:   ex    %0,0(1)"
-                : /* no output */
-                : "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].f)
-                : "1" );
+	asm volatile(		/* store reg to fp_regs.fprs[reg] */
+		"	bras	1,0f\n"
+		"	ste	0,0(%1)\n"
+		"0:	ex	%0,0(1)"
+		: /* no output */
+		: "a" (reg<<4), "a" (&current->thread.fp_regs.fprs[reg].f)
+		: "1");
 }
 
 int math_emu_b3(__u8 *opcode, struct pt_regs * regs) {
@@ -2089,23 +2089,22 @@
 
         if ((opc & 0x90) == 0) {           /* test if rx in {0,2,4,6} */
                 /* we got an exception therfore ry can't be in {0,2,4,6} */
-                __asm__ __volatile (       /* load rx from fp_regs.fprs[ry] */
-                        "     bras  1,0f\n"
-                        "     ld    0,0(%1)\n"
-                        "0:   ex    %0,0(1)"
-                        : /* no output */
-                        : "a" (opc & 0xf0),
-                          "a" (&fp_regs->fprs[opc & 0xf].d)
-                        : "1" );
+		asm volatile(		/* load rx from fp_regs.fprs[ry] */
+			"	bras	1,0f\n"
+			"	ld	0,0(%1)\n"
+			"0:	ex	%0,0(1)"
+			: /* no output */
+			: "a" (opc & 0xf0), "a" (&fp_regs->fprs[opc & 0xf].d)
+			: "1");
         } else if ((opc & 0x9) == 0) {     /* test if ry in {0,2,4,6} */
-                __asm__ __volatile (       /* store ry to fp_regs.fprs[rx] */
-                        "     bras  1,0f\n"
-                        "     std   0,0(%1)\n"
-                        "0:   ex    %0,0(1)"
-                        : /* no output */
-                        : "a" ((opc & 0xf) << 4),
-                          "a" (&fp_regs->fprs[(opc & 0xf0)>>4].d)
-                        : "1" );
+		asm volatile (		/* store ry to fp_regs.fprs[rx] */
+			"	bras	1,0f\n"
+			"	std	0,0(%1)\n"
+			"0:	ex	%0,0(1)"
+			: /* no output */
+			: "a" ((opc & 0xf) << 4),
+			  "a" (&fp_regs->fprs[(opc & 0xf0)>>4].d)
+			: "1");
         } else  /* move fp_regs.fprs[ry] to fp_regs.fprs[rx] */
                 fp_regs->fprs[(opc & 0xf0) >> 4] = fp_regs->fprs[opc & 0xf];
 	return 0;
@@ -2120,23 +2119,22 @@
 
         if ((opc & 0x90) == 0) {           /* test if rx in {0,2,4,6} */
                 /* we got an exception therfore ry can't be in {0,2,4,6} */
-                __asm__ __volatile (       /* load rx from fp_regs.fprs[ry] */
-                        "     bras  1,0f\n"
-                        "     le    0,0(%1)\n"
-                        "0:   ex    %0,0(1)"
-                        : /* no output */
-                        : "a" (opc & 0xf0),
-                          "a" (&fp_regs->fprs[opc & 0xf].f)
-                        : "1" );
+		asm volatile(		/* load rx from fp_regs.fprs[ry] */
+			"	bras	1,0f\n"
+			"	le	0,0(%1)\n"
+			"0:	ex	%0,0(1)"
+			: /* no output */
+			: "a" (opc & 0xf0), "a" (&fp_regs->fprs[opc & 0xf].f)
+			: "1");
         } else if ((opc & 0x9) == 0) {     /* test if ry in {0,2,4,6} */
-                __asm__ __volatile (       /* store ry to fp_regs.fprs[rx] */
-                        "     bras  1,0f\n"
-                        "     ste   0,0(%1)\n"
-                        "0:   ex    %0,0(1)"
-                        : /* no output */
-                        : "a" ((opc & 0xf) << 4),
-                          "a" (&fp_regs->fprs[(opc & 0xf0) >> 4].f)
-                        : "1" );
+		asm volatile(		/* store ry to fp_regs.fprs[rx] */
+			"	bras	1,0f\n"
+			"	ste	0,0(%1)\n"
+			"0:	ex	%0,0(1)"
+			: /* no output */
+			: "a" ((opc & 0xf) << 4),
+			  "a" (&fp_regs->fprs[(opc & 0xf0) >> 4].f)
+			: "1");
         } else  /* move fp_regs.fprs[ry] to fp_regs.fprs[rx] */
                 fp_regs->fprs[(opc & 0xf0) >> 4] = fp_regs->fprs[opc & 0xf];
 	return 0;
diff --git a/arch/s390/math-emu/sfp-util.h b/arch/s390/math-emu/sfp-util.h
index ab556b6..5b6ca45 100644
--- a/arch/s390/math-emu/sfp-util.h
+++ b/arch/s390/math-emu/sfp-util.h
@@ -4,48 +4,51 @@
 #include <asm/byteorder.h>
 
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) ({		\
-        unsigned int __sh = (ah);			\
-        unsigned int __sl = (al);			\
-        __asm__ ("   alr  %1,%3\n"			\
-                 "   brc  12,0f\n"			\
-                 "   ahi  %0,1\n"			\
-                 "0: alr  %0,%2"			\
-                 : "+&d" (__sh), "+d" (__sl)		\
-                 : "d" (bh), "d" (bl) : "cc" );		\
-        (sh) = __sh;					\
-        (sl) = __sl;					\
+	unsigned int __sh = (ah);			\
+	unsigned int __sl = (al);			\
+	asm volatile(					\
+		"	alr	%1,%3\n"		\
+		"	brc	12,0f\n"		\
+		"	ahi	%0,1\n"			\
+		"0:	alr  %0,%2"			\
+		: "+&d" (__sh), "+d" (__sl)		\
+		: "d" (bh), "d" (bl) : "cc");		\
+	(sh) = __sh;					\
+	(sl) = __sl;					\
 })
 
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) ({		\
-       unsigned int __sh = (ah);			\
-       unsigned int __sl = (al);			\
-       __asm__ ("   slr  %1,%3\n"			\
-                "   brc  3,0f\n"			\
-                "   ahi  %0,-1\n"			\
-                "0: slr  %0,%2"				\
-                : "+&d" (__sh), "+d" (__sl)		\
-                : "d" (bh), "d" (bl) : "cc" );		\
-       (sh) = __sh;					\
-       (sl) = __sl;					\
+	unsigned int __sh = (ah);			\
+	unsigned int __sl = (al);			\
+	asm volatile(					\
+		"	slr	%1,%3\n"		\
+		"	brc	3,0f\n"			\
+		"	ahi	%0,-1\n"		\
+		"0:	slr	%0,%2"			\
+		: "+&d" (__sh), "+d" (__sl)		\
+		: "d" (bh), "d" (bl) : "cc");		\
+	(sh) = __sh;					\
+	(sl) = __sl;					\
 })
 
 /* a umul b = a mul b + (a>=2<<31) ? b<<32:0 + (b>=2<<31) ? a<<32:0 */
 #define umul_ppmm(wh, wl, u, v) ({			\
-        unsigned int __wh = u;				\
-        unsigned int __wl = v;				\
-        __asm__ ("   ltr  1,%0\n"			\
-                 "   mr   0,%1\n"			\
-                 "   jnm  0f\n"				\
-                 "   alr  0,%1\n"			\
-                 "0: ltr  %1,%1\n"			\
-                 "   jnm  1f\n"				\
-                 "   alr  0,%0\n"			\
-                 "1: lr   %0,0\n"			\
-                 "   lr   %1,1\n"			\
-                 : "+d" (__wh), "+d" (__wl)		\
-                 : : "0", "1", "cc" );			\
-        wh = __wh;					\
-        wl = __wl;					\
+	unsigned int __wh = u;				\
+	unsigned int __wl = v;				\
+	asm volatile(					\
+		"	ltr	1,%0\n"			\
+		"	mr	0,%1\n"			\
+		"	jnm	0f\n"				\
+		"	alr	0,%1\n"			\
+		"0:	ltr	%1,%1\n"			\
+		"	jnm	1f\n"				\
+		"	alr	0,%0\n"			\
+		"1:	lr	%0,0\n"			\
+		"	lr	%1,1\n"			\
+		: "+d" (__wh), "+d" (__wl)		\
+		: : "0", "1", "cc");			\
+	wh = __wh;					\
+	wl = __wl;					\
 })
 
 #define udiv_qrnnd(q, r, n1, n0, d)			\
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 9b11e3e..226275d 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -142,17 +142,17 @@
 
 	rx = (unsigned long) parameter;
 	ry = (unsigned long) func;
-	__asm__ __volatile__(
+	asm volatile(
 #ifdef CONFIG_64BIT
-		"   sam31\n" // switch to 31 bit
-		"   diag    %0,%1,0x64\n"
-		"   sam64\n" // switch back to 64 bit
+		"	sam31\n"
+		"	diag	%0,%1,0x64\n"
+		"	sam64\n"
 #else
-		"   diag    %0,%1,0x64\n"
+		"	diag	%0,%1,0x64\n"
 #endif
-		"   ipm     %2\n"
-		"   srl     %2,28\n"
-		: "+d" (rx), "+d" (ry), "=d" (rc) : : "cc" );
+		"	ipm	%2\n"
+		"	srl	%2,28\n"
+		: "+d" (rx), "+d" (ry), "=d" (rc) : : "cc");
 	*ret1 = rx;
 	*ret2 = ry;
 	return rc;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index a393c30..f2b9a84 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -424,20 +424,13 @@
 
 	if (pfault_disable)
 		return -1;
-        __asm__ __volatile__(
-                "    diag  %1,%0,0x258\n"
-		"0:  j     2f\n"
-		"1:  la    %0,8\n"
+	asm volatile(
+		"	diag	%1,%0,0x258\n"
+		"0:	j	2f\n"
+		"1:	la	%0,8\n"
 		"2:\n"
-		".section __ex_table,\"a\"\n"
-		"   .align 4\n"
-#ifndef CONFIG_64BIT
-		"   .long  0b,1b\n"
-#else /* CONFIG_64BIT */
-		"   .quad  0b,1b\n"
-#endif /* CONFIG_64BIT */
-		".previous"
-                : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc" );
+		EX_TABLE(0b,1b)
+		: "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc");
         __ctl_set_bit(0, 9);
         return rc;
 }
@@ -450,18 +443,11 @@
 	if (pfault_disable)
 		return;
 	__ctl_clear_bit(0,9);
-        __asm__ __volatile__(
-                "    diag  %0,0,0x258\n"
+	asm volatile(
+		"	diag	%0,0,0x258\n"
 		"0:\n"
-		".section __ex_table,\"a\"\n"
-		"   .align 4\n"
-#ifndef CONFIG_64BIT
-		"   .long  0b,0b\n"
-#else /* CONFIG_64BIT */
-		"   .quad  0b,0b\n"
-#endif /* CONFIG_64BIT */
-		".previous"
-		: : "a" (&refbk), "m" (refbk) : "cc" );
+		EX_TABLE(0b,0b)
+		: : "a" (&refbk), "m" (refbk) : "cc");
 }
 
 asmlinkage void
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index cfd9b8f..127044e 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -45,26 +45,17 @@
 {
         if (addr >= 0x7ff00000)
                 return;
+	asm volatile(
 #ifdef CONFIG_64BIT
-        asm volatile (
-		"   sam31\n"
-		"   diag %0,%0,0x10\n"
-		"0: sam64\n"
-		".section __ex_table,\"a\"\n"
-		"   .align 8\n"
-		"   .quad 0b, 0b\n"
-		".previous\n"
-		: : "a" (addr));
+		"	sam31\n"
+		"	diag	%0,%0,0x10\n"
+		"0:	sam64\n"
 #else
-        asm volatile (
-		"   diag %0,%0,0x10\n"
+		"	diag	%0,%0,0x10\n"
 		"0:\n"
-		".section __ex_table,\"a\"\n"
-		"   .align 4\n"
-		"   .long 0b, 0b\n"
-		".previous\n"
-		: : "a" (addr));
 #endif
+		EX_TABLE(0b,0b)
+		: : "a" (addr));
 }
 
 void show_mem(void)
@@ -156,11 +147,10 @@
 	S390_lowcore.kernel_asce = pgdir_k;
 
         /* enable virtual mapping in kernel mode */
-        __asm__ __volatile__("    LCTL  1,1,%0\n"
-                             "    LCTL  7,7,%0\n"
-                             "    LCTL  13,13,%0\n"
-                             "    SSM   %1" 
-			     : : "m" (pgdir_k), "m" (ssm_mask));
+	__ctl_load(pgdir_k, 1, 1);
+	__ctl_load(pgdir_k, 7, 7);
+	__ctl_load(pgdir_k, 13, 13);
+	__raw_local_irq_ssm(ssm_mask);
 
         local_flush_tlb();
         return;
@@ -241,11 +231,10 @@
 	S390_lowcore.kernel_asce = pgdir_k;
 
         /* enable virtual mapping in kernel mode */
-        __asm__ __volatile__("lctlg 1,1,%0\n\t"
-                             "lctlg 7,7,%0\n\t"
-                             "lctlg 13,13,%0\n\t"
-                             "ssm   %1"
-			     : :"m" (pgdir_k), "m" (ssm_mask));
+	__ctl_load(pgdir_k, 1, 1);
+	__ctl_load(pgdir_k, 7, 7);
+	__ctl_load(pgdir_k, 13, 13);
+	__raw_local_irq_ssm(ssm_mask);
 
         local_flush_tlb();
 
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index 23fa0b2..9d051e5 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -63,44 +63,26 @@
  * and function code cmd.
  * In case of an exception return 3. Otherwise return result of bitwise OR of
  * resulting condition code and DIAG return code. */
-static __inline__ int
-dia250(void *iob, int cmd)
+static inline int dia250(void *iob, int cmd)
 {
+	register unsigned long reg0 asm ("0") = (unsigned long) iob;
 	typedef union {
 		struct dasd_diag_init_io init_io;
 		struct dasd_diag_rw_io rw_io;
 	} addr_type;
 	int rc;
 
-	__asm__ __volatile__(
-#ifdef CONFIG_64BIT
-		"	lghi	%0,3\n"
-		"	lgr	0,%3\n"
+	rc = 3;
+	asm volatile(
 		"	diag	0,%2,0x250\n"
 		"0:	ipm	%0\n"
 		"	srl	%0,28\n"
 		"	or	%0,1\n"
 		"1:\n"
-		".section __ex_table,\"a\"\n"
-		"	.align 8\n"
-		"	.quad  0b,1b\n"
-		".previous\n"
-#else
-		"	lhi	%0,3\n"
-		"	lr	0,%3\n"
-		"	diag	0,%2,0x250\n"
-		"0:	ipm	%0\n"
-		"	srl	%0,28\n"
-		"	or	%0,1\n"
-		"1:\n"
-		".section __ex_table,\"a\"\n"
-		"	.align 4\n"
-		"	.long 0b,1b\n"
-		".previous\n"
-#endif
-		: "=&d" (rc), "=m" (*(addr_type *) iob)
-		: "d" (cmd), "d" (iob), "m" (*(addr_type *) iob)
-		: "0", "1", "cc");
+		EX_TABLE(0b,1b)
+		: "+d" (rc), "=m" (*(addr_type *) iob)
+		: "d" (cmd), "d" (reg0), "m" (*(addr_type *) iob)
+		: "1", "cc");
 	return rc;
 }
 
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index cab2c73..a04d912 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -89,28 +89,15 @@
  */
 static int xpram_page_in (unsigned long page_addr, unsigned int xpage_index)
 {
-	int cc;
+	int cc = 2;	/* return unused cc 2 if pgin traps */
 
-	__asm__ __volatile__ (
-		"   lhi   %0,2\n"  /* return unused cc 2 if pgin traps */
-		"   .insn rre,0xb22e0000,%1,%2\n"  /* pgin %1,%2 */
-                "0: ipm   %0\n"
-		"   srl   %0,28\n"
+	asm volatile(
+		"	.insn	rre,0xb22e0000,%1,%2\n"  /* pgin %1,%2 */
+		"0:	ipm	%0\n"
+		"	srl	%0,28\n"
 		"1:\n"
-#ifndef CONFIG_64BIT
-		".section __ex_table,\"a\"\n"
-		"   .align 4\n"
-		"   .long  0b,1b\n"
-		".previous"
-#else
-                ".section __ex_table,\"a\"\n"
-                "   .align 8\n"
-                "   .quad 0b,1b\n"
-                ".previous"
-#endif
-		: "=&d" (cc) 
-		: "a" (__pa(page_addr)), "a" (xpage_index) 
-		: "cc" );
+		EX_TABLE(0b,1b)
+		: "+d" (cc) : "a" (__pa(page_addr)), "d" (xpage_index) : "cc");
 	if (cc == 3)
 		return -ENXIO;
 	if (cc == 2) {
@@ -137,28 +124,15 @@
  */
 static long xpram_page_out (unsigned long page_addr, unsigned int xpage_index)
 {
-	int cc;
+	int cc = 2;	/* return unused cc 2 if pgin traps */
 
-	__asm__ __volatile__ (
-		"   lhi   %0,2\n"  /* return unused cc 2 if pgout traps */
-		"   .insn rre,0xb22f0000,%1,%2\n"  /* pgout %1,%2 */
-                "0: ipm   %0\n"
-		"   srl   %0,28\n"
+	asm volatile(
+		"	.insn	rre,0xb22f0000,%1,%2\n"  /* pgout %1,%2 */
+		"0:	ipm	%0\n"
+		"	srl	%0,28\n"
 		"1:\n"
-#ifndef CONFIG_64BIT
-		".section __ex_table,\"a\"\n"
-		"   .align 4\n"
-		"   .long  0b,1b\n"
-		".previous"
-#else
-                ".section __ex_table,\"a\"\n"
-                "   .align 8\n"
-                "   .quad 0b,1b\n"
-                ".previous"
-#endif
-		: "=&d" (cc) 
-		: "a" (__pa(page_addr)), "a" (xpage_index) 
-		: "cc" );
+		EX_TABLE(0b,1b)
+		: "+d" (cc) : "a" (__pa(page_addr)), "d" (xpage_index) : "cc");
 	if (cc == 3)
 		return -ENXIO;
 	if (cc == 2) {
diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c
index 985d161..31e3357 100644
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@@ -100,13 +100,12 @@
 {
 	int cc;
 
-	__asm__ __volatile__(
-		"   .insn rre,0xb2200000,%1,%2\n"  /* servc %1,%2 */
-		"   ipm	  %0\n"
-		"   srl	  %0,28"
-		: "=&d" (cc)
-		: "d" (command), "a" (__pa(sccb))
-		: "cc", "memory" );
+	asm volatile(
+		"	.insn	rre,0xb2200000,%1,%2\n"  /* servc %1,%2 */
+		"	ipm	%0\n"
+		"	srl	%0,28"
+		: "=&d" (cc) : "d" (command), "a" (__pa(sccb))
+		: "cc", "memory");
 	if (cc == 3)
 		return -EIO;
 	if (cc == 2)
@@ -360,16 +359,6 @@
 	sclp_process_queue();
 }
 
-/* Return current Time-Of-Day clock. */
-static inline u64
-sclp_get_clock(void)
-{
-	u64 result;
-
-	asm volatile ("STCK 0(%1)" : "=m" (result) : "a" (&(result)) : "cc");
-	return result;
-}
-
 /* Convert interval in jiffies to TOD ticks. */
 static inline u64
 sclp_tod_from_jiffies(unsigned long jiffies)
@@ -382,7 +371,6 @@
 void
 sclp_sync_wait(void)
 {
-	unsigned long psw_mask;
 	unsigned long flags;
 	unsigned long cr0, cr0_sync;
 	u64 timeout;
@@ -392,7 +380,7 @@
 	timeout = 0;
 	if (timer_pending(&sclp_request_timer)) {
 		/* Get timeout TOD value */
-		timeout = sclp_get_clock() +
+		timeout = get_clock() +
 			  sclp_tod_from_jiffies(sclp_request_timer.expires -
 						jiffies);
 	}
@@ -406,13 +394,12 @@
 	cr0_sync |= 0x00000200;
 	cr0_sync &= 0xFFFFF3AC;
 	__ctl_load(cr0_sync, 0, 0);
-	asm volatile ("STOSM 0(%1),0x01"
-		      : "=m" (psw_mask) : "a" (&psw_mask) : "memory");
+	__raw_local_irq_stosm(0x01);
 	/* Loop until driver state indicates finished request */
 	while (sclp_running_state != sclp_running_state_idle) {
 		/* Check for expired request timer */
 		if (timer_pending(&sclp_request_timer) &&
-		    sclp_get_clock() > timeout &&
+		    get_clock() > timeout &&
 		    del_timer(&sclp_request_timer))
 			sclp_request_timer.function(sclp_request_timer.data);
 		barrier();
diff --git a/drivers/s390/char/vmwatchdog.c b/drivers/s390/char/vmwatchdog.c
index 807320a..4b868f7 100644
--- a/drivers/s390/char/vmwatchdog.c
+++ b/drivers/s390/char/vmwatchdog.c
@@ -54,48 +54,20 @@
 static int __diag288(enum vmwdt_func func, unsigned int timeout,
 			    char *cmd, size_t len)
 {
-	register unsigned long __func asm("2");
-	register unsigned long __timeout asm("3");
-	register unsigned long __cmdp asm("4");
-	register unsigned long __cmdl asm("5");
+	register unsigned long __func asm("2") = func;
+	register unsigned long __timeout asm("3") = timeout;
+	register unsigned long __cmdp asm("4") = virt_to_phys(cmd);
+	register unsigned long __cmdl asm("5") = len;
 	int err;
 
-	__func = func;
-	__timeout = timeout;
-	__cmdp = virt_to_phys(cmd);
-	__cmdl = len;
-	err = 0;
-	asm volatile (
-#ifdef CONFIG_64BIT
-		       "diag %2,%4,0x288\n"
-		"1:	\n"
-		".section .fixup,\"ax\"\n"
-		"2:	lghi %0,%1\n"
-		"	jg 1b\n"
-		".previous\n"
-		".section __ex_table,\"a\"\n"
-		"	.align 8\n"
-		"	.quad 1b,2b\n"
-		".previous\n"
-#else
-		       "diag %2,%4,0x288\n"
-		"1:	\n"
-		".section .fixup,\"ax\"\n"
-		"2:	lhi %0,%1\n"
-		"	bras 1,3f\n"
-		"	.long 1b\n"
-		"3:	l 1,0(1)\n"
-		"	br 1\n"
-		".previous\n"
-		".section __ex_table,\"a\"\n"
-		"	.align 4\n"
-		"	.long 1b,2b\n"
-		".previous\n"
-#endif
-		: "+&d"(err)
-		: "i"(-EINVAL), "d"(__func), "d"(__timeout),
-		  "d"(__cmdp), "d"(__cmdl)
-		: "1", "cc");
+	err = -EINVAL;
+	asm volatile(
+		"	diag	%1,%3,0x288\n"
+		"0:	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (err) : "d"(__func), "d"(__timeout),
+		  "d"(__cmdp), "d"(__cmdl), "0" (-EINVAL) : "1", "cc");
 	return err;
 }
 
diff --git a/drivers/s390/cio/device_id.c b/drivers/s390/cio/device_id.c
index 438db48..1398367 100644
--- a/drivers/s390/cio/device_id.c
+++ b/drivers/s390/cio/device_id.c
@@ -42,18 +42,15 @@
 	spin_lock_irqsave(&diag210_lock, flags);
 	diag210_tmp = *addr;
 
-	asm volatile (
-		"   lhi	  %0,-1\n"
-		"   sam31\n"
-		"   diag  %1,0,0x210\n"
-		"0: ipm	  %0\n"
-		"   srl	  %0,28\n"
-		"1: sam64\n"
-		".section __ex_table,\"a\"\n"
-		"    .align 8\n"
-		"    .quad 0b,1b\n"
-		".previous"
-		: "=&d" (ccode) : "a" (__pa(&diag210_tmp)) : "cc", "memory" );
+	asm volatile(
+		"	lhi	%0,-1\n"
+		"	sam31\n"
+		"	diag	%1,0,0x210\n"
+		"0:	ipm	%0\n"
+		"	srl	%0,28\n"
+		"1:	sam64\n"
+		EX_TABLE(0b,1b)
+		: "=&d" (ccode) : "a" (__pa(&diag210_tmp)) : "cc", "memory");
 
 	*addr = diag210_tmp;
 	spin_unlock_irqrestore(&diag210_lock, flags);
@@ -66,17 +63,14 @@
 {
 	int ccode;
 
-	asm volatile (
-		"   lhi	  %0,-1\n"
-		"   diag  %1,0,0x210\n"
-		"0: ipm	  %0\n"
-		"   srl	  %0,28\n"
+	asm volatile(
+		"	lhi	%0,-1\n"
+		"	diag	%1,0,0x210\n"
+		"0:	ipm	%0\n"
+		"	srl	%0,28\n"
 		"1:\n"
-		".section __ex_table,\"a\"\n"
-		"    .align 4\n"
-		"    .long 0b,1b\n"
-		".previous"
-		: "=&d" (ccode) : "a" (__pa(addr)) : "cc", "memory" );
+		EX_TABLE(0b,1b)
+		: "=&d" (ccode) : "a" (__pa(addr)) : "cc", "memory");
 
 	return ccode;
 }
diff --git a/drivers/s390/cio/ioasm.h b/drivers/s390/cio/ioasm.h
index 95a9462..ad6d829 100644
--- a/drivers/s390/cio/ioasm.h
+++ b/drivers/s390/cio/ioasm.h
@@ -25,106 +25,74 @@
 static inline int stsch(struct subchannel_id schid,
 			    volatile struct schib *addr)
 {
+	register struct subchannel_id reg1 asm ("1") = schid;
 	int ccode;
 
-	__asm__ __volatile__(
-		"   lr	  1,%1\n"
-		"   stsch 0(%2)\n"
-		"   ipm	  %0\n"
-		"   srl	  %0,28"
-		: "=d" (ccode)
-		: "d" (schid), "a" (addr), "m" (*addr)
-		: "cc", "1" );
+	asm volatile(
+		"	stsch	0(%2)\n"
+		"	ipm	%0\n"
+		"	srl	%0,28"
+		: "=d" (ccode) : "d" (reg1), "a" (addr), "m" (*addr) : "cc");
 	return ccode;
 }
 
 static inline int stsch_err(struct subchannel_id schid,
 				volatile struct schib *addr)
 {
-	int ccode;
+	register struct subchannel_id reg1 asm ("1") = schid;
+	int ccode = -EIO;
 
-	__asm__ __volatile__(
-		"    lhi  %0,%3\n"
-		"    lr	  1,%1\n"
-		"    stsch 0(%2)\n"
-		"0:  ipm  %0\n"
-		"    srl  %0,28\n"
+	asm volatile(
+		"	stsch	0(%2)\n"
+		"0:	ipm	%0\n"
+		"	srl	%0,28\n"
 		"1:\n"
-#ifdef CONFIG_64BIT
-		".section __ex_table,\"a\"\n"
-		"   .align 8\n"
-		"   .quad 0b,1b\n"
-		".previous"
-#else
-		".section __ex_table,\"a\"\n"
-		"   .align 4\n"
-		"   .long 0b,1b\n"
-		".previous"
-#endif
-		: "=&d" (ccode)
-		: "d" (schid), "a" (addr), "K" (-EIO), "m" (*addr)
-		: "cc", "1" );
+		EX_TABLE(0b,1b)
+		: "+d" (ccode) : "d" (reg1), "a" (addr), "m" (*addr) : "cc");
 	return ccode;
 }
 
 static inline int msch(struct subchannel_id schid,
 			   volatile struct schib *addr)
 {
+	register struct subchannel_id reg1 asm ("1") = schid;
 	int ccode;
 
-	__asm__ __volatile__(
-		"   lr	  1,%1\n"
-		"   msch  0(%2)\n"
-		"   ipm	  %0\n"
-		"   srl	  %0,28"
-		: "=d" (ccode)
-		: "d" (schid), "a" (addr), "m" (*addr)
-		: "cc", "1" );
+	asm volatile(
+		"	msch	0(%2)\n"
+		"	ipm	%0\n"
+		"	srl	%0,28"
+		: "=d" (ccode) : "d" (reg1), "a" (addr), "m" (*addr) : "cc");
 	return ccode;
 }
 
 static inline int msch_err(struct subchannel_id schid,
 			       volatile struct schib *addr)
 {
-	int ccode;
+	register struct subchannel_id reg1 asm ("1") = schid;
+	int ccode = -EIO;
 
-	__asm__ __volatile__(
-		"    lhi  %0,%3\n"
-		"    lr	  1,%1\n"
-		"    msch 0(%2)\n"
-		"0:  ipm  %0\n"
-		"    srl  %0,28\n"
+	asm volatile(
+		"	msch	0(%2)\n"
+		"0:	ipm	%0\n"
+		"	srl	%0,28\n"
 		"1:\n"
-#ifdef CONFIG_64BIT
-		".section __ex_table,\"a\"\n"
-		"   .align 8\n"
-		"   .quad 0b,1b\n"
-		".previous"
-#else
-		".section __ex_table,\"a\"\n"
-		"   .align 4\n"
-		"   .long 0b,1b\n"
-		".previous"
-#endif
-		: "=&d" (ccode)
-		: "d" (schid), "a" (addr), "K" (-EIO), "m" (*addr)
-		: "cc", "1" );
+		EX_TABLE(0b,1b)
+		: "+d" (ccode) : "d" (reg1), "a" (addr), "m" (*addr) : "cc");
 	return ccode;
 }
 
 static inline int tsch(struct subchannel_id schid,
 			   volatile struct irb *addr)
 {
+	register struct subchannel_id reg1 asm ("1") = schid;
 	int ccode;
 
-	__asm__ __volatile__(
-		"   lr	  1,%1\n"
-		"   tsch  0(%2)\n"
-		"   ipm	  %0\n"
-		"   srl	  %0,28"
-		: "=d" (ccode)
-		: "d" (schid), "a" (addr), "m" (*addr)
-		: "cc", "1" );
+	asm volatile(
+		"	tsch	0(%2)\n"
+		"	ipm	%0\n"
+		"	srl	%0,28"
+		: "=d" (ccode) : "d" (reg1), "a" (addr), "m" (*addr) : "cc");
 	return ccode;
 }
 
@@ -132,89 +100,77 @@
 {
 	int ccode;
 
-	__asm__ __volatile__(
-		"   tpi	  0(%1)\n"
-		"   ipm	  %0\n"
-		"   srl	  %0,28"
-		: "=d" (ccode)
-		: "a" (addr), "m" (*addr)
-		: "cc", "1" );
+	asm volatile(
+		"	tpi	0(%1)\n"
+		"	ipm	%0\n"
+		"	srl	%0,28"
+		: "=d" (ccode) : "a" (addr), "m" (*addr) : "cc");
 	return ccode;
 }
 
 static inline int ssch(struct subchannel_id schid,
 			   volatile struct orb *addr)
 {
+	register struct subchannel_id reg1 asm ("1") = schid;
 	int ccode;
 
-	__asm__ __volatile__(
-		"   lr	  1,%1\n"
-		"   ssch  0(%2)\n"
-		"   ipm	  %0\n"
-		"   srl	  %0,28"
-		: "=d" (ccode)
-		: "d" (schid), "a" (addr), "m" (*addr)
-		: "cc", "1" );
+	asm volatile(
+		"	ssch	0(%2)\n"
+		"	ipm	%0\n"
+		"	srl	%0,28"
+		: "=d" (ccode) : "d" (reg1), "a" (addr), "m" (*addr) : "cc");
 	return ccode;
 }
 
 static inline int rsch(struct subchannel_id schid)
 {
+	register struct subchannel_id reg1 asm ("1") = schid;
 	int ccode;
 
-	__asm__ __volatile__(
-		"   lr	  1,%1\n"
-		"   rsch\n"
-		"   ipm	  %0\n"
-		"   srl	  %0,28"
-		: "=d" (ccode)
-		: "d" (schid)
-		: "cc", "1" );
+	asm volatile(
+		"	rsch\n"
+		"	ipm	%0\n"
+		"	srl	%0,28"
+		: "=d" (ccode) : "d" (reg1) : "cc");
 	return ccode;
 }
 
 static inline int csch(struct subchannel_id schid)
 {
+	register struct subchannel_id reg1 asm ("1") = schid;
 	int ccode;
 
-	__asm__ __volatile__(
-		"   lr	  1,%1\n"
-		"   csch\n"
-		"   ipm	  %0\n"
-		"   srl	  %0,28"
-		: "=d" (ccode)
-		: "d" (schid)
-		: "cc", "1" );
+	asm volatile(
+		"	csch\n"
+		"	ipm	%0\n"
+		"	srl	%0,28"
+		: "=d" (ccode) : "d" (reg1) : "cc");
 	return ccode;
 }
 
 static inline int hsch(struct subchannel_id schid)
 {
+	register struct subchannel_id reg1 asm ("1") = schid;
 	int ccode;
 
-	__asm__ __volatile__(
-		"   lr	  1,%1\n"
-		"   hsch\n"
-		"   ipm	  %0\n"
-		"   srl	  %0,28"
-		: "=d" (ccode)
-		: "d" (schid)
-		: "cc", "1" );
+	asm volatile(
+		"	hsch\n"
+		"	ipm	%0\n"
+		"	srl	%0,28"
+		: "=d" (ccode) : "d" (reg1) : "cc");
 	return ccode;
 }
 
 static inline int xsch(struct subchannel_id schid)
 {
+	register struct subchannel_id reg1 asm ("1") = schid;
 	int ccode;
 
-	__asm__ __volatile__(
-		"   lr	  1,%1\n"
-		"   .insn rre,0xb2760000,%1,0\n"
-		"   ipm	  %0\n"
-		"   srl	  %0,28"
-		: "=d" (ccode)
-		: "d" (schid)
-		: "cc", "1" );
+	asm volatile(
+		"	.insn	rre,0xb2760000,%1,0\n"
+		"	ipm	%0\n"
+		"	srl	%0,28"
+		: "=d" (ccode) : "d" (reg1) : "cc");
 	return ccode;
 }
 
@@ -223,41 +179,27 @@
 	typedef struct { char _[4096]; } addr_type;
 	int cc;
 
-	__asm__ __volatile__ (
-		".insn	rre,0xb25f0000,%2,0	\n\t"
-		"ipm	%0	\n\t"
-		"srl	%0,28	\n\t"
+	asm volatile(
+		"	.insn	rre,0xb25f0000,%2,0\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
 		: "=d" (cc), "=m" (*(addr_type *) chsc_area)
 		: "d" (chsc_area), "m" (*(addr_type *) chsc_area)
-		: "cc" );
-
+		: "cc");
 	return cc;
 }
 
-static inline int iac( void)
-{
-	int ccode;
-
-	__asm__ __volatile__(
-		"   iac	  1\n"
-		"   ipm	  %0\n"
-		"   srl	  %0,28"
-		: "=d" (ccode) : : "cc", "1" );
-	return ccode;
-}
-
 static inline int rchp(int chpid)
 {
+	register unsigned int reg1 asm ("1") = chpid;
 	int ccode;
 
-	__asm__ __volatile__(
-		"   lr	  1,%1\n"
-		"   rchp\n"
-		"   ipm	  %0\n"
-		"   srl	  %0,28"
-		: "=d" (ccode)
-		: "d" (chpid)
-		: "cc", "1" );
+	asm volatile(
+		"	lr	1,%1\n"
+		"	rchp\n"
+		"	ipm	%0\n"
+		"	srl	%0,28"
+		: "=d" (ccode) : "d" (reg1) : "cc");
 	return ccode;
 }
 
diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h
index 1245693..49bb9e3 100644
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h
@@ -274,12 +274,11 @@
        register unsigned long _sch asm ("1") = sch;
        unsigned long _queuestart = ((unsigned long)queue << 32) | *start;
 
-       asm volatile (
-              " .insn rsy,0xeb000000008A,%1,0,0(%2)\n\t"
-              : "+d" (_ccq), "+d" (_queuestart)
-              : "d" ((unsigned long)state), "d" (_sch)
-              : "memory", "cc"
-       );
+       asm volatile(
+	       "	.insn	rsy,0xeb000000008A,%1,0,0(%2)"
+	       : "+d" (_ccq), "+d" (_queuestart)
+	       : "d" ((unsigned long)state), "d" (_sch)
+	       : "memory", "cc");
        *count = _ccq & 0xff;
        *start = _queuestart & 0xff;
 
@@ -299,12 +298,11 @@
 	unsigned long _queuestart = ((unsigned long)queue << 32) | *start;
 	unsigned long _state = 0;
 
-	asm volatile (
-	      " .insn rrf,0xB99c0000,%1,%2,0,0  \n\t"
-	      : "+d" (_ccq), "+d" (_queuestart), "+d" (_state)
-	      : "d" (_sch)
-	      : "memory", "cc"
-	);
+	asm volatile(
+		"	.insn	rrf,0xB99c0000,%1,%2,0,0"
+		: "+d" (_ccq), "+d" (_queuestart), "+d" (_state)
+		: "d" (_sch)
+		: "memory", "cc" );
 	*count = _ccq & 0xff;
 	*start = _queuestart & 0xff;
 	*state = _state & 0xff;
@@ -319,69 +317,35 @@
 static inline int
 do_siga_sync(struct subchannel_id schid, unsigned int mask1, unsigned int mask2)
 {
+	register unsigned long reg0 asm ("0") = 2;
+	register struct subchannel_id reg1 asm ("1") = schid;
+	register unsigned long reg2 asm ("2") = mask1;
+	register unsigned long reg3 asm ("3") = mask2;
 	int cc;
 
-#ifndef CONFIG_64BIT
-	asm volatile (
-		"lhi	0,2	\n\t"
-		"lr	1,%1	\n\t"
-		"lr	2,%2	\n\t"
-		"lr	3,%3	\n\t"
-		"siga   0	\n\t"
-		"ipm	%0	\n\t"
-		"srl	%0,28	\n\t"
+	asm volatile(
+		"	siga	0\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
 		: "=d" (cc)
-		: "d" (schid), "d" (mask1), "d" (mask2)
-		: "cc", "0", "1", "2", "3"
-		);
-#else /* CONFIG_64BIT */
-	asm volatile (
-		"lghi	0,2	\n\t"
-		"llgfr	1,%1	\n\t"
-		"llgfr	2,%2	\n\t"
-		"llgfr	3,%3	\n\t"
-		"siga   0	\n\t"
-		"ipm	%0	\n\t"
-		"srl	%0,28	\n\t"
-		: "=d" (cc)
-		: "d" (schid), "d" (mask1), "d" (mask2)
-		: "cc", "0", "1", "2", "3"
-		);
-#endif /* CONFIG_64BIT */
+		: "d" (reg0), "d" (reg1), "d" (reg2), "d" (reg3) : "cc");
 	return cc;
 }
 
 static inline int
 do_siga_input(struct subchannel_id schid, unsigned int mask)
 {
+	register unsigned long reg0 asm ("0") = 1;
+	register struct subchannel_id reg1 asm ("1") = schid;
+	register unsigned long reg2 asm ("2") = mask;
 	int cc;
 
-#ifndef CONFIG_64BIT
-	asm volatile (
-		"lhi	0,1	\n\t"
-		"lr	1,%1	\n\t"
-		"lr	2,%2	\n\t"
-		"siga   0	\n\t"
-		"ipm	%0	\n\t"
-		"srl	%0,28	\n\t"
+	asm volatile(
+		"	siga	0\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
 		: "=d" (cc)
-		: "d" (schid), "d" (mask)
-		: "cc", "0", "1", "2", "memory"
-		);
-#else /* CONFIG_64BIT */
-	asm volatile (
-		"lghi	0,1	\n\t"
-		"llgfr	1,%1	\n\t"
-		"llgfr	2,%2	\n\t"
-		"siga   0	\n\t"
-		"ipm	%0	\n\t"
-		"srl	%0,28	\n\t"
-		: "=d" (cc)
-		: "d" (schid), "d" (mask)
-		: "cc", "0", "1", "2", "memory"
-		);
-#endif /* CONFIG_64BIT */
-	
+		: "d" (reg0), "d" (reg1), "d" (reg2) : "cc", "memory");
 	return cc;
 }
 
@@ -389,93 +353,35 @@
 do_siga_output(unsigned long schid, unsigned long mask, __u32 *bb,
 	       unsigned int fc)
 {
+	register unsigned long __fc asm("0") = fc;
+	register unsigned long __schid asm("1") = schid;
+	register unsigned long __mask asm("2") = mask;
 	int cc;
-	__u32 busy_bit;
 
-#ifndef CONFIG_64BIT
-	asm volatile (
-		"lhi	0,0	\n\t"
-		"lr	1,%2	\n\t"
-		"lr	2,%3	\n\t"
-		"siga	0	\n\t"
-		"0:"
-		"ipm	%0	\n\t"
-		"srl	%0,28	\n\t"
-		"srl	0,31	\n\t"
-		"lr	%1,0	\n\t"
-		"1:	\n\t"
-		".section .fixup,\"ax\"\n\t"
-		"2:	\n\t"
-		"lhi	%0,%4	\n\t"
-		"bras	1,3f	\n\t"
-		".long 1b	\n\t"
-		"3:	\n\t"
-		"l	1,0(1)	\n\t"
-		"br	1	\n\t"
-		".previous	\n\t"
-		".section __ex_table,\"a\"\n\t"
-		".align 4	\n\t"
-		".long	0b,2b	\n\t"
-		".previous	\n\t"
-		: "=d" (cc), "=d" (busy_bit)
-		: "d" (schid), "d" (mask),
-		"i" (QDIO_SIGA_ERROR_ACCESS_EXCEPTION)
-		: "cc", "0", "1", "2", "memory"
-		);
-#else /* CONFIG_64BIT */
-	asm volatile (
-        	"llgfr  0,%5    \n\t"
-                "lgr    1,%2    \n\t"
-		"llgfr	2,%3	\n\t"
-		"siga	0	\n\t"
-		"0:"
-		"ipm	%0	\n\t"
-		"srl	%0,28	\n\t"
-		"srl	0,31	\n\t"
-		"llgfr	%1,0	\n\t"
-		"1:	\n\t"
-		".section .fixup,\"ax\"\n\t"
-		"lghi	%0,%4	\n\t"
-		"jg	1b	\n\t"
-		".previous\n\t"
-		".section __ex_table,\"a\"\n\t"
-		".align 8	\n\t"
-		".quad	0b,1b	\n\t"
-		".previous	\n\t"
-		: "=d" (cc), "=d" (busy_bit)
-		: "d" (schid), "d" (mask),
-		"i" (QDIO_SIGA_ERROR_ACCESS_EXCEPTION), "d" (fc)
-		: "cc", "0", "1", "2", "memory"
-		);
-#endif /* CONFIG_64BIT */
-	
-	(*bb) = busy_bit;
+	asm volatile(
+		"	siga	0\n"
+		"0:	ipm	%0\n"
+		"	srl	%0,28\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (cc), "+d" (__fc), "+d" (__schid), "+d" (__mask)
+		: "0" (QDIO_SIGA_ERROR_ACCESS_EXCEPTION)
+		: "cc", "memory");
+	(*bb) = ((unsigned int) __fc) >> 31;
 	return cc;
 }
 
 static inline unsigned long
 do_clear_global_summary(void)
 {
+	register unsigned long __fn asm("1") = 3;
+	register unsigned long __tmp asm("2");
+	register unsigned long __time asm("3");
 
-	unsigned long time;
-
-#ifndef CONFIG_64BIT
-	asm volatile (
-		"lhi	1,3	\n\t"
-		".insn	rre,0xb2650000,2,0	\n\t"
-		"lr	%0,3	\n\t"
-		: "=d" (time) : : "cc", "1", "2", "3"
-		);
-#else /* CONFIG_64BIT */
-	asm volatile (
-		"lghi	1,3	\n\t"
-		".insn	rre,0xb2650000,2,0	\n\t"
-		"lgr	%0,3	\n\t"
-		: "=d" (time) : : "cc", "1", "2", "3"
-		);
-#endif /* CONFIG_64BIT */
-	
-	return time;
+	asm volatile(
+		"	.insn	rre,0xb2650000,2,0"
+		: "+d" (__fn), "=d" (__tmp), "=d" (__time));
+	return __time;
 }
 	
 /*
diff --git a/drivers/s390/net/iucv.c b/drivers/s390/net/iucv.c
index 821dde8..809dd8d 100644
--- a/drivers/s390/net/iucv.c
+++ b/drivers/s390/net/iucv.c
@@ -534,19 +534,15 @@
  *
  * Returns: return code from CP's IUCV call
  */
-static __inline__ ulong
-b2f0(__u32 code, void *parm)
+static inline ulong b2f0(__u32 code, void *parm)
 {
+	register unsigned long reg0 asm ("0");
+	register unsigned long reg1 asm ("1");
 	iucv_dumpit("iparml before b2f0 call:", parm, sizeof(iucv_param));
 
-	asm volatile (
-		"LRA   1,0(%1)\n\t"
-		"LR    0,%0\n\t"
-		".long 0xb2f01000"
-		:
-		: "d" (code), "a" (parm)
-		: "0", "1"
-		);
+	reg0 = code;
+	reg1 = virt_to_phys(parm);
+	asm volatile(".long 0xb2f01000" : : "d" (reg0), "a" (reg1));
 
 	iucv_dumpit("iparml after b2f0 call:", parm, sizeof(iucv_param));
 
@@ -1248,6 +1244,8 @@
 static int
 iucv_query_generic(int want_maxconn)
 {
+	register unsigned long reg0 asm ("0");
+	register unsigned long reg1 asm ("1");
 	iparml_purge *parm = (iparml_purge *)grab_param();
 	int bufsize, maxconn;
 	int ccode;
@@ -1256,18 +1254,15 @@
 	 * Call b2f0 and store R0 (max buffer size),
 	 * R1 (max connections) and CC.
 	 */
-	asm volatile (
-		"LRA   1,0(%4)\n\t"
-		"LR    0,%3\n\t"
-		".long 0xb2f01000\n\t"
-		"IPM   %0\n\t"
-		"SRL   %0,28\n\t"
-		"ST    0,%1\n\t"
-		"ST    1,%2\n\t"
-		: "=d" (ccode), "=m" (bufsize), "=m" (maxconn)
-		: "d" (QUERY), "a" (parm)
-		: "0", "1", "cc"
-		);
+	reg0 = QUERY;
+	reg1 = virt_to_phys(parm);
+	asm volatile(
+		"	.long	0xb2f01000\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (ccode), "+d" (reg0), "+d" (reg1) : : "cc");
+	bufsize = reg0;
+	maxconn = reg1;
 	release_param(parm);
 
 	if (ccode)
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index a914129..479364d 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -253,11 +253,12 @@
 		kill_task = 1;
 
 #ifndef CONFIG_64BIT
-	asm volatile("ld 0,0(%0)\n"
-		     "ld 2,8(%0)\n"
-		     "ld 4,16(%0)\n"
-		     "ld 6,24(%0)"
-		     : : "a" (&S390_lowcore.floating_pt_save_area));
+	asm volatile(
+		"	ld	0,0(%0)\n"
+		"	ld	2,8(%0)\n"
+		"	ld	4,16(%0)\n"
+		"	ld	6,24(%0)"
+		: : "a" (&S390_lowcore.floating_pt_save_area));
 #endif
 
 	if (MACHINE_HAS_IEEE) {
@@ -274,37 +275,36 @@
 			 * Floating point control register can't be restored.
 			 * Task will be terminated.
 			 */
-			asm volatile ("lfpc 0(%0)" : : "a" (&zero), "m" (zero));
+			asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero));
 			kill_task = 1;
 
-		}
-		else
-			asm volatile (
-				"lfpc 0(%0)"
-				: : "a" (fpt_creg_save_area));
+		} else
+			asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
 
-		asm volatile("ld  0,0(%0)\n"
-			     "ld  1,8(%0)\n"
-			     "ld  2,16(%0)\n"
-			     "ld  3,24(%0)\n"
-			     "ld  4,32(%0)\n"
-			     "ld  5,40(%0)\n"
-			     "ld  6,48(%0)\n"
-			     "ld  7,56(%0)\n"
-			     "ld  8,64(%0)\n"
-			     "ld  9,72(%0)\n"
-			     "ld 10,80(%0)\n"
-			     "ld 11,88(%0)\n"
-			     "ld 12,96(%0)\n"
-			     "ld 13,104(%0)\n"
-			     "ld 14,112(%0)\n"
-			     "ld 15,120(%0)\n"
-			     : : "a" (fpt_save_area));
+		asm volatile(
+			"	ld	0,0(%0)\n"
+			"	ld	1,8(%0)\n"
+			"	ld	2,16(%0)\n"
+			"	ld	3,24(%0)\n"
+			"	ld	4,32(%0)\n"
+			"	ld	5,40(%0)\n"
+			"	ld	6,48(%0)\n"
+			"	ld	7,56(%0)\n"
+			"	ld	8,64(%0)\n"
+			"	ld	9,72(%0)\n"
+			"	ld	10,80(%0)\n"
+			"	ld	11,88(%0)\n"
+			"	ld	12,96(%0)\n"
+			"	ld	13,104(%0)\n"
+			"	ld	14,112(%0)\n"
+			"	ld	15,120(%0)\n"
+			: : "a" (fpt_save_area));
 	}
 
 	/* Revalidate access registers */
-	asm volatile("lam 0,15,0(%0)"
-		     : : "a" (&S390_lowcore.access_regs_save_area));
+	asm volatile(
+		"	lam	0,15,0(%0)"
+		: : "a" (&S390_lowcore.access_regs_save_area));
 	if (!mci->ar)
 		/*
 		 * Access registers have unknown contents.
@@ -321,11 +321,13 @@
 		s390_handle_damage("invalid control registers.");
 	else
 #ifdef CONFIG_64BIT
-		asm volatile("lctlg 0,15,0(%0)"
-			     : : "a" (&S390_lowcore.cregs_save_area));
+		asm volatile(
+			"	lctlg	0,15,0(%0)"
+			: : "a" (&S390_lowcore.cregs_save_area));
 #else
-		asm volatile("lctl 0,15,0(%0)"
-			     : : "a" (&S390_lowcore.cregs_save_area));
+		asm volatile(
+			"	lctl	0,15,0(%0)"
+			: : "a" (&S390_lowcore.cregs_save_area));
 #endif
 
 	/*
@@ -339,20 +341,23 @@
 	 * old contents (should be zero) otherwise set it to zero.
 	 */
 	if (!mci->pr)
-		asm volatile("sr 0,0\n"
-			     "sckpf"
-			     : : : "0", "cc");
+		asm volatile(
+			"	sr	0,0\n"
+			"	sckpf"
+			: : : "0", "cc");
 	else
 		asm volatile(
-			"l 0,0(%0)\n"
-			"sckpf"
-			: : "a" (&S390_lowcore.tod_progreg_save_area) : "0", "cc");
+			"	l	0,0(%0)\n"
+			"	sckpf"
+			: : "a" (&S390_lowcore.tod_progreg_save_area)
+			: "0", "cc");
 #endif
 
 	/* Revalidate clock comparator register */
-	asm volatile ("stck 0(%1)\n"
-		      "sckc 0(%1)"
-		      : "=m" (tmpclock) : "a" (&(tmpclock)) : "cc", "memory");
+	asm volatile(
+		"	stck	0(%1)\n"
+		"	sckc	0(%1)"
+		: "=m" (tmpclock) : "a" (&(tmpclock)) : "cc", "memory");
 
 	/* Check if old PSW is valid */
 	if (!mci->wp)
diff --git a/include/asm-s390/appldata.h b/include/asm-s390/appldata.h
index b177070..79283da 100644
--- a/include/asm-s390/appldata.h
+++ b/include/asm-s390/appldata.h
@@ -80,7 +80,7 @@
 	parm_list.product_id_addr = (unsigned long) id;
 	parm_list.buffer_addr = virt_to_phys(buffer);
 	asm volatile(
-		"diag %1,%0,0xdc"
+		"	diag	%1,%0,0xdc"
 		: "=d" (ry)
 		: "d" (&parm_list), "m" (parm_list), "m" (*id)
 		: "cc");
diff --git a/include/asm-s390/atomic.h b/include/asm-s390/atomic.h
index 399bf02..af20c74 100644
--- a/include/asm-s390/atomic.h
+++ b/include/asm-s390/atomic.h
@@ -30,20 +30,43 @@
 
 #ifdef __KERNEL__
 
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
+
 #define __CS_LOOP(ptr, op_val, op_string) ({				\
 	typeof(ptr->counter) old_val, new_val;				\
-        __asm__ __volatile__("   l     %0,0(%3)\n"			\
-                             "0: lr    %1,%0\n"				\
-                             op_string "  %1,%4\n"			\
-                             "   cs    %0,%1,0(%3)\n"			\
-                             "   jl    0b"				\
-                             : "=&d" (old_val), "=&d" (new_val),	\
-			       "=m" (((atomic_t *)(ptr))->counter)	\
-			     : "a" (ptr), "d" (op_val),			\
-			       "m" (((atomic_t *)(ptr))->counter)	\
-			     : "cc", "memory" );			\
+	asm volatile(							\
+		"	l	%0,%2\n"				\
+		"0:	lr	%1,%0\n"				\
+		op_string "	%1,%3\n"				\
+		"	cs	%0,%1,%2\n"				\
+		"	jl	0b"					\
+		: "=&d" (old_val), "=&d" (new_val),			\
+		  "=Q" (((atomic_t *)(ptr))->counter)			\
+		: "d" (op_val),	 "Q" (((atomic_t *)(ptr))->counter)	\
+		: "cc", "memory");					\
 	new_val;							\
 })
+
+#else /* __GNUC__ */
+
+#define __CS_LOOP(ptr, op_val, op_string) ({				\
+	typeof(ptr->counter) old_val, new_val;				\
+	asm volatile(							\
+		"	l	%0,0(%3)\n"				\
+		"0:	lr	%1,%0\n"				\
+		op_string "	%1,%4\n"				\
+		"	cs	%0,%1,0(%3)\n"				\
+		"	jl	0b"					\
+		: "=&d" (old_val), "=&d" (new_val),			\
+		  "=m" (((atomic_t *)(ptr))->counter)			\
+		: "a" (ptr), "d" (op_val),				\
+		  "m" (((atomic_t *)(ptr))->counter)			\
+		: "cc", "memory");					\
+	new_val;							\
+})
+
+#endif /* __GNUC__ */
+
 #define atomic_read(v)          ((v)->counter)
 #define atomic_set(v,i)         (((v)->counter) = (i))
 
@@ -81,10 +104,19 @@
 
 static __inline__ int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
-	__asm__ __volatile__("  cs   %0,%3,0(%2)\n"
-			     : "+d" (old), "=m" (v->counter)
-			     : "a" (v), "d" (new), "m" (v->counter)
-			     : "cc", "memory" );
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
+	asm volatile(
+		"	cs	%0,%2,%1"
+		: "+d" (old), "=Q" (v->counter)
+		: "d" (new), "Q" (v->counter)
+		: "cc", "memory");
+#else /* __GNUC__ */
+	asm volatile(
+		"	cs	%0,%3,0(%2)"
+		: "+d" (old), "=m" (v->counter)
+		: "a" (v), "d" (new), "m" (v->counter)
+		: "cc", "memory");
+#endif /* __GNUC__ */
 	return old;
 }
 
@@ -113,20 +145,43 @@
 } __attribute__ ((aligned (8))) atomic64_t;
 #define ATOMIC64_INIT(i)  { (i) }
 
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
+
 #define __CSG_LOOP(ptr, op_val, op_string) ({				\
 	typeof(ptr->counter) old_val, new_val;				\
-        __asm__ __volatile__("   lg    %0,0(%3)\n"			\
-                             "0: lgr   %1,%0\n"				\
-                             op_string "  %1,%4\n"			\
-                             "   csg   %0,%1,0(%3)\n"			\
-                             "   jl    0b"				\
-                             : "=&d" (old_val), "=&d" (new_val),	\
-			       "=m" (((atomic_t *)(ptr))->counter)	\
-			     : "a" (ptr), "d" (op_val),			\
-			       "m" (((atomic_t *)(ptr))->counter)	\
-			     : "cc", "memory" );			\
+	asm volatile(							\
+		"	lg	%0,%2\n"				\
+		"0:	lgr	%1,%0\n"				\
+		op_string "	%1,%3\n"				\
+		"	csg	%0,%1,%2\n"				\
+		"	jl	0b"					\
+		: "=&d" (old_val), "=&d" (new_val),			\
+		  "=Q" (((atomic_t *)(ptr))->counter)			\
+		: "d" (op_val),	"Q" (((atomic_t *)(ptr))->counter)	\
+		: "cc", "memory" );					\
 	new_val;							\
 })
+
+#else /* __GNUC__ */
+
+#define __CSG_LOOP(ptr, op_val, op_string) ({				\
+	typeof(ptr->counter) old_val, new_val;				\
+	asm volatile(							\
+		"	lg	%0,0(%3)\n"				\
+		"0:	lgr	%1,%0\n"				\
+		op_string "	%1,%4\n"				\
+		"	csg	%0,%1,0(%3)\n"				\
+		"	jl	0b"					\
+		: "=&d" (old_val), "=&d" (new_val),			\
+		  "=m" (((atomic_t *)(ptr))->counter)			\
+		: "a" (ptr), "d" (op_val),				\
+		  "m" (((atomic_t *)(ptr))->counter)			\
+		: "cc", "memory" );					\
+	new_val;							\
+})
+
+#endif /* __GNUC__ */
+
 #define atomic64_read(v)          ((v)->counter)
 #define atomic64_set(v,i)         (((v)->counter) = (i))
 
@@ -163,10 +218,19 @@
 static __inline__ long long atomic64_cmpxchg(atomic64_t *v,
 					     long long old, long long new)
 {
-	__asm__ __volatile__("  csg  %0,%3,0(%2)\n"
-			     : "+d" (old), "=m" (v->counter)
-			     : "a" (v), "d" (new), "m" (v->counter)
-			     : "cc", "memory" );
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
+	asm volatile(
+		"	csg	%0,%2,%1"
+		: "+d" (old), "=Q" (v->counter)
+		: "d" (new), "Q" (v->counter)
+		: "cc", "memory");
+#else /* __GNUC__ */
+	asm volatile(
+		"	csg	%0,%3,0(%2)"
+		: "+d" (old), "=m" (v->counter)
+		: "a" (v), "d" (new), "m" (v->counter)
+		: "cc", "memory");
+#endif /* __GNUC__ */
 	return old;
 }
 
diff --git a/include/asm-s390/bitops.h b/include/asm-s390/bitops.h
index 0ddcdba..f79c9b7 100644
--- a/include/asm-s390/bitops.h
+++ b/include/asm-s390/bitops.h
@@ -67,16 +67,35 @@
 #define __BITOPS_AND		"nr"
 #define __BITOPS_XOR		"xr"
 
-#define __BITOPS_LOOP(__old, __new, __addr, __val, __op_string)		\
-	__asm__ __volatile__("   l   %0,0(%4)\n"			\
-			     "0: lr  %1,%0\n"				\
-			     __op_string "  %1,%3\n"			\
-			     "   cs  %0,%1,0(%4)\n"			\
-			     "   jl  0b"				\
-			     : "=&d" (__old), "=&d" (__new),	       	\
-			       "=m" (*(unsigned long *) __addr)		\
-			     : "d" (__val), "a" (__addr),		\
-			       "m" (*(unsigned long *) __addr) : "cc" );
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
+
+#define __BITOPS_LOOP(__old, __new, __addr, __val, __op_string)	\
+	asm volatile(						\
+		"	l	%0,%2\n"			\
+		"0:	lr	%1,%0\n"			\
+		__op_string "	%1,%3\n"			\
+		"	cs	%0,%1,%2\n"			\
+		"	jl	0b"				\
+		: "=&d" (__old), "=&d" (__new),			\
+		  "=Q" (*(unsigned long *) __addr)		\
+		: "d" (__val), "Q" (*(unsigned long *) __addr)	\
+		: "cc");
+
+#else /* __GNUC__ */
+
+#define __BITOPS_LOOP(__old, __new, __addr, __val, __op_string)	\
+	asm volatile(						\
+		"	l	%0,0(%4)\n"			\
+		"0:	lr	%1,%0\n"			\
+		__op_string "	%1,%3\n"			\
+		"	cs	%0,%1,0(%4)\n"			\
+		"	jl	0b"				\
+		: "=&d" (__old), "=&d" (__new),			\
+		  "=m" (*(unsigned long *) __addr)		\
+		: "d" (__val), "a" (__addr),			\
+		  "m" (*(unsigned long *) __addr) : "cc");
+
+#endif /* __GNUC__ */
 
 #else /* __s390x__ */
 
@@ -86,21 +105,41 @@
 #define __BITOPS_AND		"ngr"
 #define __BITOPS_XOR		"xgr"
 
-#define __BITOPS_LOOP(__old, __new, __addr, __val, __op_string)		\
-	__asm__ __volatile__("   lg  %0,0(%4)\n"			\
-			     "0: lgr %1,%0\n"				\
-			     __op_string "  %1,%3\n"			\
-			     "   csg %0,%1,0(%4)\n"			\
-			     "   jl  0b"				\
-			     : "=&d" (__old), "=&d" (__new),	       	\
-			       "=m" (*(unsigned long *) __addr)		\
-			     : "d" (__val), "a" (__addr),		\
-			       "m" (*(unsigned long *) __addr) : "cc" );
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
+
+#define __BITOPS_LOOP(__old, __new, __addr, __val, __op_string)	\
+	asm volatile(						\
+		"	lg	%0,%2\n"			\
+		"0:	lgr	%1,%0\n"			\
+		__op_string "	%1,%3\n"			\
+		"	csg	%0,%1,%2\n"			\
+		"	jl	0b"				\
+		: "=&d" (__old), "=&d" (__new),			\
+		  "=Q" (*(unsigned long *) __addr)		\
+		: "d" (__val), "Q" (*(unsigned long *) __addr)	\
+		: "cc");
+
+#else /* __GNUC__ */
+
+#define __BITOPS_LOOP(__old, __new, __addr, __val, __op_string)	\
+	asm volatile(						\
+		"	lg	%0,0(%4)\n"			\
+		"0:	lgr	%1,%0\n"			\
+		__op_string "	%1,%3\n"			\
+		"	csg	%0,%1,0(%4)\n"			\
+		"	jl	0b"				\
+		: "=&d" (__old), "=&d" (__new),			\
+		  "=m" (*(unsigned long *) __addr)		\
+		: "d" (__val), "a" (__addr),			\
+		  "m" (*(unsigned long *) __addr) : "cc");
+
+
+#endif /* __GNUC__ */
 
 #endif /* __s390x__ */
 
 #define __BITOPS_WORDS(bits) (((bits)+__BITOPS_WORDSIZE-1)/__BITOPS_WORDSIZE)
-#define __BITOPS_BARRIER() __asm__ __volatile__ ( "" : : : "memory" )
+#define __BITOPS_BARRIER() asm volatile("" : : : "memory")
 
 #ifdef CONFIG_SMP
 /*
@@ -217,10 +256,10 @@
 	unsigned long addr;
 
 	addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
-        asm volatile("oc 0(1,%1),0(%2)"
-		     : "=m" (*(char *) addr)
-		     : "a" (addr), "a" (_oi_bitmap + (nr & 7)),
-		       "m" (*(char *) addr) : "cc" );
+	asm volatile(
+		"	oc	0(1,%1),0(%2)"
+		: "=m" (*(char *) addr) : "a" (addr),
+		  "a" (_oi_bitmap + (nr & 7)), "m" (*(char *) addr) : "cc" );
 }
 
 static inline void 
@@ -229,40 +268,7 @@
 	unsigned long addr;
 
 	addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
-	switch (nr&7) {
-	case 0:
-		asm volatile ("oi 0(%1),0x01" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 1:
-		asm volatile ("oi 0(%1),0x02" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 2:
-		asm volatile ("oi 0(%1),0x04" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 3:
-		asm volatile ("oi 0(%1),0x08" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 4:
-		asm volatile ("oi 0(%1),0x10" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 5:
-		asm volatile ("oi 0(%1),0x20" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 6:
-		asm volatile ("oi 0(%1),0x40" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 7:
-		asm volatile ("oi 0(%1),0x80" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	}
+	*(unsigned char *) addr |= 1 << (nr & 7);
 }
 
 #define set_bit_simple(nr,addr) \
@@ -279,10 +285,10 @@
 	unsigned long addr;
 
 	addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
-        asm volatile("nc 0(1,%1),0(%2)"
-		     : "=m" (*(char *) addr)
-		     : "a" (addr), "a" (_ni_bitmap + (nr & 7)),
-		       "m" (*(char *) addr) : "cc" );
+	asm volatile(
+		"	nc	0(1,%1),0(%2)"
+		: "=m" (*(char *) addr)	: "a" (addr),
+		  "a" (_ni_bitmap + (nr & 7)), "m" (*(char *) addr) : "cc");
 }
 
 static inline void 
@@ -291,40 +297,7 @@
 	unsigned long addr;
 
 	addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
-	switch (nr&7) {
-	case 0:
-		asm volatile ("ni 0(%1),0xFE" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 1:
-		asm volatile ("ni 0(%1),0xFD": "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 2:
-		asm volatile ("ni 0(%1),0xFB" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 3:
-		asm volatile ("ni 0(%1),0xF7" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 4:
-		asm volatile ("ni 0(%1),0xEF" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 5:
-		asm volatile ("ni 0(%1),0xDF" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 6:
-		asm volatile ("ni 0(%1),0xBF" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 7:
-		asm volatile ("ni 0(%1),0x7F" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	}
+	*(unsigned char *) addr &= ~(1 << (nr & 7));
 }
 
 #define clear_bit_simple(nr,addr) \
@@ -340,10 +313,10 @@
 	unsigned long addr;
 
 	addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
-        asm volatile("xc 0(1,%1),0(%2)"
-		     :  "=m" (*(char *) addr)
-		     : "a" (addr), "a" (_oi_bitmap + (nr & 7)),
-		       "m" (*(char *) addr) : "cc" );
+	asm volatile(
+		"	xc	0(1,%1),0(%2)"
+		:  "=m" (*(char *) addr) : "a" (addr),
+		   "a" (_oi_bitmap + (nr & 7)), "m" (*(char *) addr) : "cc" );
 }
 
 static inline void 
@@ -352,40 +325,7 @@
 	unsigned long addr;
 
 	addr = ((unsigned long) ptr) + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
-	switch (nr&7) {
-	case 0:
-		asm volatile ("xi 0(%1),0x01" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 1:
-		asm volatile ("xi 0(%1),0x02" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 2:
-		asm volatile ("xi 0(%1),0x04" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 3:
-		asm volatile ("xi 0(%1),0x08" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 4:
-		asm volatile ("xi 0(%1),0x10" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 5:
-		asm volatile ("xi 0(%1),0x20" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 6:
-		asm volatile ("xi 0(%1),0x40" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	case 7:
-		asm volatile ("xi 0(%1),0x80" : "=m" (*(char *) addr)
-			      : "a" (addr), "m" (*(char *) addr) : "cc" );
-		break;
-	}
+	*(unsigned char *) addr ^= 1 << (nr & 7);
 }
 
 #define change_bit_simple(nr,addr) \
@@ -404,10 +344,11 @@
 
 	addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
 	ch = *(unsigned char *) addr;
-        asm volatile("oc 0(1,%1),0(%2)"
-		     : "=m" (*(char *) addr)
-		     : "a" (addr), "a" (_oi_bitmap + (nr & 7)),
-		       "m" (*(char *) addr) : "cc", "memory" );
+	asm volatile(
+		"	oc	0(1,%1),0(%2)"
+		: "=m" (*(char *) addr)
+		: "a" (addr), "a" (_oi_bitmap + (nr & 7)),
+		  "m" (*(char *) addr) : "cc", "memory");
 	return (ch >> (nr & 7)) & 1;
 }
 #define __test_and_set_bit(X,Y)		test_and_set_bit_simple(X,Y)
@@ -423,10 +364,11 @@
 
 	addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
 	ch = *(unsigned char *) addr;
-        asm volatile("nc 0(1,%1),0(%2)"
-		     : "=m" (*(char *) addr)
-		     : "a" (addr), "a" (_ni_bitmap + (nr & 7)),
-		       "m" (*(char *) addr) : "cc", "memory" );
+	asm volatile(
+		"	nc	0(1,%1),0(%2)"
+		: "=m" (*(char *) addr)
+		: "a" (addr), "a" (_ni_bitmap + (nr & 7)),
+		  "m" (*(char *) addr) : "cc", "memory");
 	return (ch >> (nr & 7)) & 1;
 }
 #define __test_and_clear_bit(X,Y)	test_and_clear_bit_simple(X,Y)
@@ -442,10 +384,11 @@
 
 	addr = (unsigned long) ptr + ((nr ^ (__BITOPS_WORDSIZE - 8)) >> 3);
 	ch = *(unsigned char *) addr;
-        asm volatile("xc 0(1,%1),0(%2)"
-		     : "=m" (*(char *) addr)
-		     : "a" (addr), "a" (_oi_bitmap + (nr & 7)),
-		       "m" (*(char *) addr) : "cc", "memory" );
+	asm volatile(
+		"	xc	0(1,%1),0(%2)"
+		: "=m" (*(char *) addr)
+		: "a" (addr), "a" (_oi_bitmap + (nr & 7)),
+		  "m" (*(char *) addr) : "cc", "memory");
 	return (ch >> (nr & 7)) & 1;
 }
 #define __test_and_change_bit(X,Y)	test_and_change_bit_simple(X,Y)
@@ -557,35 +500,36 @@
 
         if (!size)
                 return 0;
-        __asm__("   lhi  %1,-1\n"
-                "   lr   %2,%3\n"
-                "   slr  %0,%0\n"
-                "   ahi  %2,31\n"
-                "   srl  %2,5\n"
-                "0: c    %1,0(%0,%4)\n"
-                "   jne  1f\n"
-                "   la   %0,4(%0)\n"
-                "   brct %2,0b\n"
-                "   lr   %0,%3\n"
-                "   j    4f\n"
-                "1: l    %2,0(%0,%4)\n"
-                "   sll  %0,3\n"
-                "   lhi  %1,0xff\n"
-                "   tml  %2,0xffff\n"
-                "   jno  2f\n"
-                "   ahi  %0,16\n"
-                "   srl  %2,16\n"
-                "2: tml  %2,0x00ff\n"
-                "   jno  3f\n"
-                "   ahi  %0,8\n"
-                "   srl  %2,8\n"
-                "3: nr   %2,%1\n"
-                "   ic   %2,0(%2,%5)\n"
-                "   alr  %0,%2\n"
-                "4:"
-                : "=&a" (res), "=&d" (cmp), "=&a" (count)
-                : "a" (size), "a" (addr), "a" (&_zb_findmap),
-		  "m" (*(addrtype *) addr) : "cc" );
+	asm volatile(
+		"	lhi	%1,-1\n"
+		"	lr	%2,%3\n"
+		"	slr	%0,%0\n"
+		"	ahi	%2,31\n"
+		"	srl	%2,5\n"
+		"0:	c	%1,0(%0,%4)\n"
+		"	jne	1f\n"
+		"	la	%0,4(%0)\n"
+		"	brct	%2,0b\n"
+		"	lr	%0,%3\n"
+		"	j	4f\n"
+		"1:	l	%2,0(%0,%4)\n"
+		"	sll	%0,3\n"
+		"	lhi	%1,0xff\n"
+		"	tml	%2,0xffff\n"
+		"	jno	2f\n"
+		"	ahi	%0,16\n"
+		"	srl	%2,16\n"
+		"2:	tml	%2,0x00ff\n"
+		"	jno	3f\n"
+		"	ahi	%0,8\n"
+		"	srl	%2,8\n"
+		"3:	nr	%2,%1\n"
+		"	ic	%2,0(%2,%5)\n"
+		"	alr	%0,%2\n"
+		"4:"
+		: "=&a" (res), "=&d" (cmp), "=&a" (count)
+		: "a" (size), "a" (addr), "a" (&_zb_findmap),
+		  "m" (*(addrtype *) addr) : "cc");
         return (res < size) ? res : size;
 }
 
@@ -598,35 +542,36 @@
 
         if (!size)
                 return 0;
-        __asm__("   slr  %1,%1\n"
-                "   lr   %2,%3\n"
-                "   slr  %0,%0\n"
-                "   ahi  %2,31\n"
-                "   srl  %2,5\n"
-                "0: c    %1,0(%0,%4)\n"
-                "   jne  1f\n"
-                "   la   %0,4(%0)\n"
-                "   brct %2,0b\n"
-                "   lr   %0,%3\n"
-                "   j    4f\n"
-                "1: l    %2,0(%0,%4)\n"
-                "   sll  %0,3\n"
-                "   lhi  %1,0xff\n"
-                "   tml  %2,0xffff\n"
-                "   jnz  2f\n"
-                "   ahi  %0,16\n"
-                "   srl  %2,16\n"
-                "2: tml  %2,0x00ff\n"
-                "   jnz  3f\n"
-                "   ahi  %0,8\n"
-                "   srl  %2,8\n"
-                "3: nr   %2,%1\n"
-                "   ic   %2,0(%2,%5)\n"
-                "   alr  %0,%2\n"
-                "4:"
-                : "=&a" (res), "=&d" (cmp), "=&a" (count)
-                : "a" (size), "a" (addr), "a" (&_sb_findmap),
-		  "m" (*(addrtype *) addr) : "cc" );
+	asm volatile(
+		"	slr	%1,%1\n"
+		"	lr	%2,%3\n"
+		"	slr	%0,%0\n"
+		"	ahi	%2,31\n"
+		"	srl	%2,5\n"
+		"0:	c	%1,0(%0,%4)\n"
+		"	jne	1f\n"
+		"	la	%0,4(%0)\n"
+		"	brct	%2,0b\n"
+		"	lr	%0,%3\n"
+		"	j	4f\n"
+		"1:	l	%2,0(%0,%4)\n"
+		"	sll	%0,3\n"
+		"	lhi	%1,0xff\n"
+		"	tml	%2,0xffff\n"
+		"	jnz	2f\n"
+		"	ahi	%0,16\n"
+		"	srl	%2,16\n"
+		"2:	tml	%2,0x00ff\n"
+		"	jnz	3f\n"
+		"	ahi	%0,8\n"
+		"	srl	%2,8\n"
+		"3:	nr	%2,%1\n"
+		"	ic	%2,0(%2,%5)\n"
+		"	alr	%0,%2\n"
+		"4:"
+		: "=&a" (res), "=&d" (cmp), "=&a" (count)
+		: "a" (size), "a" (addr), "a" (&_sb_findmap),
+		  "m" (*(addrtype *) addr) : "cc");
         return (res < size) ? res : size;
 }
 
@@ -640,39 +585,40 @@
 
         if (!size)
                 return 0;
-        __asm__("   lghi  %1,-1\n"
-                "   lgr   %2,%3\n"
-                "   slgr  %0,%0\n"
-                "   aghi  %2,63\n"
-                "   srlg  %2,%2,6\n"
-                "0: cg    %1,0(%0,%4)\n"
-                "   jne   1f\n"
-                "   la    %0,8(%0)\n"
-                "   brct  %2,0b\n"
-                "   lgr   %0,%3\n"
-                "   j     5f\n"
-                "1: lg    %2,0(%0,%4)\n"
-                "   sllg  %0,%0,3\n"
-                "   clr   %2,%1\n"
-		"   jne   2f\n"
-		"   aghi  %0,32\n"
-                "   srlg  %2,%2,32\n"
-		"2: lghi  %1,0xff\n"
-                "   tmll  %2,0xffff\n"
-                "   jno   3f\n"
-                "   aghi  %0,16\n"
-                "   srl   %2,16\n"
-                "3: tmll  %2,0x00ff\n"
-                "   jno   4f\n"
-                "   aghi  %0,8\n"
-                "   srl   %2,8\n"
-                "4: ngr   %2,%1\n"
-                "   ic    %2,0(%2,%5)\n"
-                "   algr  %0,%2\n"
-                "5:"
-                : "=&a" (res), "=&d" (cmp), "=&a" (count)
+	asm volatile(
+		"	lghi	%1,-1\n"
+		"	lgr	%2,%3\n"
+		"	slgr	%0,%0\n"
+		"	aghi	%2,63\n"
+		"	srlg	%2,%2,6\n"
+		"0:	cg	%1,0(%0,%4)\n"
+		"	jne	1f\n"
+		"	la	%0,8(%0)\n"
+		"	brct	%2,0b\n"
+		"	lgr	%0,%3\n"
+		"	j	5f\n"
+		"1:	lg	%2,0(%0,%4)\n"
+		"	sllg	%0,%0,3\n"
+		"	clr	%2,%1\n"
+		"	jne	2f\n"
+		"	aghi	%0,32\n"
+		"	srlg	%2,%2,32\n"
+		"2:	lghi	%1,0xff\n"
+		"	tmll	%2,0xffff\n"
+		"	jno	3f\n"
+		"	aghi	%0,16\n"
+		"	srl	%2,16\n"
+		"3:	tmll	%2,0x00ff\n"
+		"	jno	4f\n"
+		"	aghi	%0,8\n"
+		"	srl	%2,8\n"
+		"4:	ngr	%2,%1\n"
+		"	ic	%2,0(%2,%5)\n"
+		"	algr	%0,%2\n"
+		"5:"
+		: "=&a" (res), "=&d" (cmp), "=&a" (count)
 		: "a" (size), "a" (addr), "a" (&_zb_findmap),
-		  "m" (*(addrtype *) addr) : "cc" );
+		  "m" (*(addrtype *) addr) : "cc");
         return (res < size) ? res : size;
 }
 
@@ -684,39 +630,40 @@
 
         if (!size)
                 return 0;
-        __asm__("   slgr  %1,%1\n"
-                "   lgr   %2,%3\n"
-                "   slgr  %0,%0\n"
-                "   aghi  %2,63\n"
-                "   srlg  %2,%2,6\n"
-                "0: cg    %1,0(%0,%4)\n"
-                "   jne   1f\n"
-                "   aghi  %0,8\n"
-                "   brct  %2,0b\n"
-                "   lgr   %0,%3\n"
-                "   j     5f\n"
-                "1: lg    %2,0(%0,%4)\n"
-                "   sllg  %0,%0,3\n"
-                "   clr   %2,%1\n"
-		"   jne   2f\n"
-		"   aghi  %0,32\n"
-                "   srlg  %2,%2,32\n"
-		"2: lghi  %1,0xff\n"
-                "   tmll  %2,0xffff\n"
-                "   jnz   3f\n"
-                "   aghi  %0,16\n"
-                "   srl   %2,16\n"
-                "3: tmll  %2,0x00ff\n"
-                "   jnz   4f\n"
-                "   aghi  %0,8\n"
-                "   srl   %2,8\n"
-                "4: ngr   %2,%1\n"
-                "   ic    %2,0(%2,%5)\n"
-                "   algr  %0,%2\n"
-                "5:"
-                : "=&a" (res), "=&d" (cmp), "=&a" (count)
+	asm volatile(
+		"	slgr	%1,%1\n"
+		"	lgr	%2,%3\n"
+		"	slgr	%0,%0\n"
+		"	aghi	%2,63\n"
+		"	srlg	%2,%2,6\n"
+		"0:	cg	%1,0(%0,%4)\n"
+		"	jne	1f\n"
+		"	aghi	%0,8\n"
+		"	brct	%2,0b\n"
+		"	lgr	%0,%3\n"
+		"	j	5f\n"
+		"1:	lg	%2,0(%0,%4)\n"
+		"	sllg	%0,%0,3\n"
+		"	clr	%2,%1\n"
+		"	jne	2f\n"
+		"	aghi	%0,32\n"
+		"	srlg	%2,%2,32\n"
+		"2:	lghi	%1,0xff\n"
+		"	tmll	%2,0xffff\n"
+		"	jnz	3f\n"
+		"	aghi	%0,16\n"
+		"	srl	%2,16\n"
+		"3:	tmll	%2,0x00ff\n"
+		"	jnz	4f\n"
+		"	aghi	%0,8\n"
+		"	srl	%2,8\n"
+		"4:	ngr	%2,%1\n"
+		"	ic	%2,0(%2,%5)\n"
+		"	algr	%0,%2\n"
+		"5:"
+		: "=&a" (res), "=&d" (cmp), "=&a" (count)
 		: "a" (size), "a" (addr), "a" (&_sb_findmap),
-		  "m" (*(addrtype *) addr) : "cc" );
+		  "m" (*(addrtype *) addr) : "cc");
         return (res < size) ? res : size;
 }
 
@@ -832,36 +779,37 @@
 
         if (!size)
                 return 0;
-        __asm__("   lhi  %1,-1\n"
-                "   lr   %2,%3\n"
-                "   ahi  %2,31\n"
-                "   srl  %2,5\n"
-                "   slr  %0,%0\n"
-                "0: cl   %1,0(%0,%4)\n"
-                "   jne  1f\n"
-                "   ahi  %0,4\n"
-                "   brct %2,0b\n"
-                "   lr   %0,%3\n"
-                "   j    4f\n"
-                "1: l    %2,0(%0,%4)\n"
-                "   sll  %0,3\n"
-                "   ahi  %0,24\n"
-                "   lhi  %1,0xff\n"
-                "   tmh  %2,0xffff\n"
-                "   jo   2f\n"
-                "   ahi  %0,-16\n"
-                "   srl  %2,16\n"
-                "2: tml  %2,0xff00\n"
-                "   jo   3f\n"
-                "   ahi  %0,-8\n"
-                "   srl  %2,8\n"
-                "3: nr   %2,%1\n"
-                "   ic   %2,0(%2,%5)\n"
-                "   alr  %0,%2\n"
-                "4:"
-                : "=&a" (res), "=&d" (cmp), "=&a" (count)
-                : "a" (size), "a" (vaddr), "a" (&_zb_findmap),
-		  "m" (*(addrtype *) vaddr) : "cc" );
+	asm volatile(
+		"	lhi	%1,-1\n"
+		"	lr	%2,%3\n"
+		"	ahi	%2,31\n"
+		"	srl	%2,5\n"
+		"	slr	%0,%0\n"
+		"0:	cl	%1,0(%0,%4)\n"
+		"	jne	1f\n"
+		"	ahi	%0,4\n"
+		"	brct	%2,0b\n"
+		"	lr	%0,%3\n"
+		"	j	4f\n"
+		"1:	l	%2,0(%0,%4)\n"
+		"	sll	%0,3\n"
+		"	ahi	%0,24\n"
+		"	lhi	%1,0xff\n"
+		"	tmh	%2,0xffff\n"
+		"	jo	2f\n"
+		"	ahi	%0,-16\n"
+		"	srl	%2,16\n"
+		"2:	tml	%2,0xff00\n"
+		"	jo	3f\n"
+		"	ahi	%0,-8\n"
+		"	srl	%2,8\n"
+		"3:	nr	%2,%1\n"
+		"	ic	%2,0(%2,%5)\n"
+		"	alr	%0,%2\n"
+		"4:"
+		: "=&a" (res), "=&d" (cmp), "=&a" (count)
+		: "a" (size), "a" (vaddr), "a" (&_zb_findmap),
+		  "m" (*(addrtype *) vaddr) : "cc");
         return (res < size) ? res : size;
 }
 
@@ -875,39 +823,40 @@
 
         if (!size)
                 return 0;
-        __asm__("   lghi  %1,-1\n"
-                "   lgr   %2,%3\n"
-                "   aghi  %2,63\n"
-                "   srlg  %2,%2,6\n"
-                "   slgr  %0,%0\n"
-                "0: clg   %1,0(%0,%4)\n"
-                "   jne   1f\n"
-                "   aghi  %0,8\n"
-                "   brct  %2,0b\n"
-                "   lgr   %0,%3\n"
-                "   j     5f\n"
-                "1: cl    %1,0(%0,%4)\n"
-		"   jne   2f\n"
-		"   aghi  %0,4\n"
-		"2: l     %2,0(%0,%4)\n"
-                "   sllg  %0,%0,3\n"
-                "   aghi  %0,24\n"
-                "   lghi  %1,0xff\n"
-                "   tmlh  %2,0xffff\n"
-                "   jo    3f\n"
-                "   aghi  %0,-16\n"
-                "   srl   %2,16\n"
-                "3: tmll  %2,0xff00\n"
-                "   jo    4f\n"
-                "   aghi  %0,-8\n"
-                "   srl   %2,8\n"
-                "4: ngr   %2,%1\n"
-                "   ic    %2,0(%2,%5)\n"
-                "   algr  %0,%2\n"
-                "5:"
-                : "=&a" (res), "=&d" (cmp), "=&a" (count)
+	asm volatile(
+		"	lghi	%1,-1\n"
+		"	lgr	%2,%3\n"
+		"	aghi	%2,63\n"
+		"	srlg	%2,%2,6\n"
+		"	slgr	%0,%0\n"
+		"0:	clg	%1,0(%0,%4)\n"
+		"	jne	1f\n"
+		"	aghi	%0,8\n"
+		"	brct	%2,0b\n"
+		"	lgr	%0,%3\n"
+		"	j	5f\n"
+		"1:	cl	%1,0(%0,%4)\n"
+		"	jne	2f\n"
+		"	aghi	%0,4\n"
+		"2:	l	%2,0(%0,%4)\n"
+		"	sllg	%0,%0,3\n"
+		"	aghi	%0,24\n"
+		"	lghi	%1,0xff\n"
+		"	tmlh	%2,0xffff\n"
+		"	jo	3f\n"
+		"	aghi	%0,-16\n"
+		"	srl	%2,16\n"
+		"3:	tmll	%2,0xff00\n"
+		"	jo	4f\n"
+		"	aghi	%0,-8\n"
+		"	srl	%2,8\n"
+		"4:	ngr	%2,%1\n"
+		"	ic	%2,0(%2,%5)\n"
+		"	algr	%0,%2\n"
+		"5:"
+		: "=&a" (res), "=&d" (cmp), "=&a" (count)
 		: "a" (size), "a" (vaddr), "a" (&_zb_findmap),
-		  "m" (*(addrtype *) vaddr) : "cc" );
+		  "m" (*(addrtype *) vaddr) : "cc");
         return (res < size) ? res : size;
 }
 
@@ -927,13 +876,16 @@
 	p = addr + offset / __BITOPS_WORDSIZE;
         if (bit) {
 #ifndef __s390x__
-                asm("   ic   %0,0(%1)\n"
-		    "   icm  %0,2,1(%1)\n"
-		    "   icm  %0,4,2(%1)\n"
-		    "   icm  %0,8,3(%1)"
-		    : "=&a" (word) : "a" (p), "m" (*p) : "cc" );
+		asm volatile(
+			"	ic	%0,0(%1)\n"
+			"	icm	%0,2,1(%1)\n"
+			"	icm	%0,4,2(%1)\n"
+			"	icm	%0,8,3(%1)"
+			: "=&a" (word) : "a" (p), "m" (*p) : "cc");
 #else
-                asm("   lrvg %0,%1" : "=a" (word) : "m" (*p) );
+		asm volatile(
+			"	lrvg	%0,%1"
+			: "=a" (word) : "m" (*p) );
 #endif
 		/*
 		 * s390 version of ffz returns __BITOPS_WORDSIZE
diff --git a/include/asm-s390/byteorder.h b/include/asm-s390/byteorder.h
index 2cc35a0..1fe2492 100644
--- a/include/asm-s390/byteorder.h
+++ b/include/asm-s390/byteorder.h
@@ -14,60 +14,54 @@
 #ifdef __GNUC__
 
 #ifdef __s390x__
-static __inline__ __u64 ___arch__swab64p(const __u64 *x)
+static inline __u64 ___arch__swab64p(const __u64 *x)
 {
 	__u64 result;
 
-	__asm__ __volatile__ (
-		"   lrvg %0,%1"
-		: "=d" (result) : "m" (*x) );
+	asm volatile("lrvg %0,%1" : "=d" (result) : "m" (*x));
 	return result;
 }
 
-static __inline__ __u64 ___arch__swab64(__u64 x)
+static inline __u64 ___arch__swab64(__u64 x)
 {
 	__u64 result;
 
-	__asm__ __volatile__ (
-		"   lrvgr %0,%1"
-		: "=d" (result) : "d" (x) );
+	asm volatile("lrvgr %0,%1" : "=d" (result) : "d" (x));
 	return result;
 }
 
-static __inline__ void ___arch__swab64s(__u64 *x)
+static inline void ___arch__swab64s(__u64 *x)
 {
 	*x = ___arch__swab64p(x);
 }
 #endif /* __s390x__ */
 
-static __inline__ __u32 ___arch__swab32p(const __u32 *x)
+static inline __u32 ___arch__swab32p(const __u32 *x)
 {
 	__u32 result;
 	
-	__asm__ __volatile__ (
+	asm volatile(
 #ifndef __s390x__
-		"        icm   %0,8,3(%1)\n"
-		"        icm   %0,4,2(%1)\n"
-		"        icm   %0,2,1(%1)\n"
-		"        ic    %0,0(%1)"
-		: "=&d" (result) : "a" (x), "m" (*x) : "cc" );
+		"	icm	%0,8,3(%1)\n"
+		"	icm	%0,4,2(%1)\n"
+		"	icm	%0,2,1(%1)\n"
+		"	ic	%0,0(%1)"
+		: "=&d" (result) : "a" (x), "m" (*x) : "cc");
 #else /* __s390x__ */
-		"   lrv  %0,%1"
-		: "=d" (result) : "m" (*x) );
+		"	lrv	%0,%1"
+		: "=d" (result) : "m" (*x));
 #endif /* __s390x__ */
 	return result;
 }
 
-static __inline__ __u32 ___arch__swab32(__u32 x)
+static inline __u32 ___arch__swab32(__u32 x)
 {
 #ifndef __s390x__
 	return ___arch__swab32p(&x);
 #else /* __s390x__ */
 	__u32 result;
 	
-	__asm__ __volatile__ (
-		"   lrvr  %0,%1"
-		: "=d" (result) : "d" (x) );
+	asm volatile("lrvr  %0,%1" : "=d" (result) : "d" (x));
 	return result;
 #endif /* __s390x__ */
 }
@@ -81,14 +75,14 @@
 {
 	__u16 result;
 	
-	__asm__ __volatile__ (
+	asm volatile(
 #ifndef __s390x__
-		"        icm   %0,2,1(%1)\n"
-		"        ic    %0,0(%1)\n"
-		: "=&d" (result) : "a" (x), "m" (*x) : "cc" );
+		"	icm	%0,2,1(%1)\n"
+		"	ic	%0,0(%1)\n"
+		: "=&d" (result) : "a" (x), "m" (*x) : "cc");
 #else /* __s390x__ */
-		"   lrvh %0,%1"
-		: "=d" (result) : "m" (*x) );
+		"	lrvh	%0,%1"
+		: "=d" (result) : "m" (*x));
 #endif /* __s390x__ */
 	return result;
 }
diff --git a/include/asm-s390/checksum.h b/include/asm-s390/checksum.h
index 471f2af..37c362d 100644
--- a/include/asm-s390/checksum.h
+++ b/include/asm-s390/checksum.h
@@ -30,57 +30,13 @@
 static inline unsigned int
 csum_partial(const unsigned char * buff, int len, unsigned int sum)
 {
-	/*
-	 * Experiments with ethernet and slip connections show that buf
-	 * is aligned on either a 2-byte or 4-byte boundary.
-	 */
-#ifndef __s390x__
-	register_pair rp;
+	register unsigned long reg2 asm("2") = (unsigned long) buff;
+	register unsigned long reg3 asm("3") = (unsigned long) len;
 
-	rp.subreg.even = (unsigned long) buff;
-	rp.subreg.odd = (unsigned long) len;
-	__asm__ __volatile__ (
-		"0:  cksm %0,%1\n"	/* do checksum on longs */
-		"    jo   0b\n"
-		: "+&d" (sum), "+&a" (rp) : : "cc", "memory" );
-#else /* __s390x__ */
-        __asm__ __volatile__ (
-                "    lgr  2,%1\n"    /* address in gpr 2 */
-                "    lgfr 3,%2\n"    /* length in gpr 3 */
-                "0:  cksm %0,2\n"    /* do checksum on longs */
-                "    jo   0b\n"
-                : "+&d" (sum)
-                : "d" (buff), "d" (len)
-                : "cc", "memory", "2", "3" );
-#endif /* __s390x__ */
-	return sum;
-}
-
-/*
- * csum_partial as an inline function
- */
-static inline unsigned int 
-csum_partial_inline(const unsigned char * buff, int len, unsigned int sum)
-{
-#ifndef __s390x__
-	register_pair rp;
-
-	rp.subreg.even = (unsigned long) buff;
-	rp.subreg.odd = (unsigned long) len;
-	__asm__ __volatile__ (
-		"0:  cksm %0,%1\n"    /* do checksum on longs */
-		"    jo   0b\n"
-                : "+&d" (sum), "+&a" (rp) : : "cc", "memory" );
-#else /* __s390x__ */
-	__asm__ __volatile__ (
-		"    lgr  2,%1\n"    /* address in gpr 2 */
-		"    lgfr 3,%2\n"    /* length in gpr 3 */
-		"0:  cksm %0,2\n"    /* do checksum on longs */
-		"    jo   0b\n"
-                : "+&d" (sum)
-		: "d" (buff), "d" (len)
-                : "cc", "memory", "2", "3" );
-#endif /* __s390x__ */
+	asm volatile(
+		"0:	cksm	%0,%1\n"	/* do checksum on longs */
+		"	jo	0b\n"
+		: "+d" (sum), "+d" (reg2), "+d" (reg3) : : "cc", "memory");
 	return sum;
 }
 
@@ -114,7 +70,7 @@
 csum_partial_copy_nocheck (const char *src, char *dst, int len, unsigned int sum)
 {
         memcpy(dst,src,len);
-        return csum_partial_inline(dst, len, sum);
+	return csum_partial(dst, len, sum);
 }
 
 /*
@@ -126,22 +82,22 @@
 #ifndef __s390x__
 	register_pair rp;
 
-	__asm__ __volatile__ (
-		"    slr  %N1,%N1\n" /* %0 = H L */
-		"    lr   %1,%0\n"   /* %0 = H L, %1 = H L 0 0 */
-		"    srdl %1,16\n"   /* %0 = H L, %1 = 0 H L 0 */
-		"    alr  %1,%N1\n"  /* %0 = H L, %1 = L H L 0 */
-		"    alr  %0,%1\n"   /* %0 = H+L+C L+H */
-		"    srl  %0,16\n"   /* %0 = H+L+C */
-		: "+&d" (sum), "=d" (rp) : : "cc" );
+	asm volatile(
+		"	slr	%N1,%N1\n"	/* %0 = H L */
+		"	lr	%1,%0\n"	/* %0 = H L, %1 = H L 0 0 */
+		"	srdl	%1,16\n"	/* %0 = H L, %1 = 0 H L 0 */
+		"	alr	%1,%N1\n"	/* %0 = H L, %1 = L H L 0 */
+		"	alr	%0,%1\n"	/* %0 = H+L+C L+H */
+		"	srl	%0,16\n"	/* %0 = H+L+C */
+		: "+&d" (sum), "=d" (rp) : : "cc");
 #else /* __s390x__ */
-	__asm__ __volatile__ (
-		"    sr   3,3\n"   /* %0 = H*65536 + L */
-		"    lr   2,%0\n"  /* %0 = H L, R2/R3 = H L / 0 0 */
-		"    srdl 2,16\n"  /* %0 = H L, R2/R3 = 0 H / L 0 */
-		"    alr  2,3\n"   /* %0 = H L, R2/R3 = L H / L 0 */
-		"    alr  %0,2\n"  /* %0 = H+L+C L+H */
-                "    srl  %0,16\n" /* %0 = H+L+C */
+	asm volatile(
+		"	sr	3,3\n"		/* %0 = H*65536 + L */
+		"	lr	2,%0\n"		/* %0 = H L, 2/3 = H L / 0 0 */
+		"	srdl	2,16\n"		/* %0 = H L, 2/3 = 0 H / L 0 */
+		"	alr	2,3\n"		/* %0 = H L, 2/3 = L H / L 0 */
+		"	alr	%0,2\n"		/* %0 = H+L+C L+H */
+		"	srl	%0,16\n"	/* %0 = H+L+C */
 		: "+&d" (sum) : : "cc", "2", "3");
 #endif /* __s390x__ */
 	return ((unsigned short) ~sum);
@@ -155,29 +111,7 @@
 static inline unsigned short
 ip_fast_csum(unsigned char *iph, unsigned int ihl)
 {
-	unsigned long sum;
-#ifndef __s390x__
-	register_pair rp;
-
-	rp.subreg.even = (unsigned long) iph;
-	rp.subreg.odd = (unsigned long) ihl*4;
-        __asm__ __volatile__ (
-		"    sr   %0,%0\n"   /* set sum to zero */
-                "0:  cksm %0,%1\n"   /* do checksum on longs */
-                "    jo   0b\n"
-                : "=&d" (sum), "+&a" (rp) : : "cc", "memory" );
-#else /* __s390x__ */
-        __asm__ __volatile__ (
-		"    slgr %0,%0\n"   /* set sum to zero */
-                "    lgr  2,%1\n"    /* address in gpr 2 */
-                "    lgfr 3,%2\n"    /* length in gpr 3 */
-                "0:  cksm %0,2\n"    /* do checksum on ints */
-                "    jo   0b\n"
-                : "=&d" (sum)
-                : "d" (iph), "d" (ihl*4)
-                : "cc", "memory", "2", "3" );
-#endif /* __s390x__ */
-        return csum_fold(sum);
+	return csum_fold(csum_partial(iph, ihl*4, 0));
 }
 
 /*
@@ -190,47 +124,47 @@
                    unsigned int sum)
 {
 #ifndef __s390x__
-	__asm__ __volatile__ (
-                "    alr   %0,%1\n"  /* sum += saddr */
-                "    brc   12,0f\n"
-		"    ahi   %0,1\n"   /* add carry */
+	asm volatile(
+		"	alr	%0,%1\n" /* sum += saddr */
+		"	brc	12,0f\n"
+		"	ahi	%0,1\n"  /* add carry */
 		"0:"
-		: "+&d" (sum) : "d" (saddr) : "cc" );
-	__asm__ __volatile__ (
-                "    alr   %0,%1\n"  /* sum += daddr */
-                "    brc   12,1f\n"
-                "    ahi   %0,1\n"   /* add carry */
+		: "+&d" (sum) : "d" (saddr) : "cc");
+	asm volatile(
+		"	alr	%0,%1\n" /* sum += daddr */
+		"	brc	12,1f\n"
+		"	ahi	%0,1\n"  /* add carry */
 		"1:"
-		: "+&d" (sum) : "d" (daddr) : "cc" );
-	__asm__ __volatile__ (
-                "    alr   %0,%1\n"  /* sum += (len<<16) + (proto<<8) */
-		"    brc   12,2f\n"
-		"    ahi   %0,1\n"   /* add carry */
+		: "+&d" (sum) : "d" (daddr) : "cc");
+	asm volatile(
+		"	alr	%0,%1\n" /* sum += (len<<16) + (proto<<8) */
+		"	brc	12,2f\n"
+		"	ahi	%0,1\n"  /* add carry */
 		"2:"
 		: "+&d" (sum)
 		: "d" (((unsigned int) len<<16) + (unsigned int) proto)
-		: "cc" );
+		: "cc");
 #else /* __s390x__ */
-	__asm__ __volatile__ (
-                "    lgfr  %0,%0\n"
-                "    algr  %0,%1\n"  /* sum += saddr */
-                "    brc   12,0f\n"
-		"    aghi  %0,1\n"   /* add carry */
-		"0:  algr  %0,%2\n"  /* sum += daddr */
-                "    brc   12,1f\n"
-                "    aghi  %0,1\n"   /* add carry */
-		"1:  algfr %0,%3\n"  /* sum += (len<<16) + proto */
-		"    brc   12,2f\n"
-		"    aghi  %0,1\n"   /* add carry */
-		"2:  srlg  0,%0,32\n"
-                "    alr   %0,0\n"   /* fold to 32 bits */
-                "    brc   12,3f\n"
-                "    ahi   %0,1\n"   /* add carry */
-                "3:  llgfr %0,%0"
+	asm volatile(
+		"	lgfr	%0,%0\n"
+		"	algr	%0,%1\n"  /* sum += saddr */
+		"	brc	12,0f\n"
+		"	aghi	%0,1\n"   /* add carry */
+		"0:	algr	%0,%2\n"  /* sum += daddr */
+		"	brc	12,1f\n"
+		"	aghi	%0,1\n"   /* add carry */
+		"1:	algfr	%0,%3\n"  /* sum += (len<<16) + proto */
+		"	brc	12,2f\n"
+		"	aghi	%0,1\n"   /* add carry */
+		"2:	srlg	0,%0,32\n"
+		"	alr	%0,0\n"   /* fold to 32 bits */
+		"	brc	12,3f\n"
+		"	ahi	%0,1\n"   /* add carry */
+		"3:	llgfr	%0,%0"
 		: "+&d" (sum)
 		: "d" (saddr), "d" (daddr),
 		  "d" (((unsigned int) len<<16) + (unsigned int) proto)
-		: "cc", "0" );
+		: "cc", "0");
 #endif /* __s390x__ */
 	return sum;
 }
diff --git a/include/asm-s390/ebcdic.h b/include/asm-s390/ebcdic.h
index 15fd2ed..7f6f641 100644
--- a/include/asm-s390/ebcdic.h
+++ b/include/asm-s390/ebcdic.h
@@ -26,16 +26,16 @@
 {
 	if (nr-- <= 0)
 		return;
-        __asm__ __volatile__(
-		"   bras 1,1f\n"
-		"   tr   0(1,%0),0(%2)\n"
-                "0: tr   0(256,%0),0(%2)\n"
-		"   la   %0,256(%0)\n"
-		"1: ahi  %1,-256\n"
-		"   jnm  0b\n"
-		"   ex   %1,0(1)"
-                : "+&a" (addr), "+&a" (nr)
-                : "a" (codepage) : "cc", "memory", "1" );
+	asm volatile(
+		"	bras	1,1f\n"
+		"	tr	0(1,%0),0(%2)\n"
+		"0:	tr	0(256,%0),0(%2)\n"
+		"	la	%0,256(%0)\n"
+		"1:	ahi	%1,-256\n"
+		"	jnm	0b\n"
+		"	ex	%1,0(1)"
+		: "+&a" (addr), "+&a" (nr)
+		: "a" (codepage) : "cc", "memory", "1");
 }
 
 #define ASCEBC(addr,nr) codepage_convert(_ascebc, addr, nr)
diff --git a/include/asm-s390/io.h b/include/asm-s390/io.h
index a6cc27e..63c78b9 100644
--- a/include/asm-s390/io.h
+++ b/include/asm-s390/io.h
@@ -27,18 +27,16 @@
 static inline unsigned long virt_to_phys(volatile void * address)
 {
 	unsigned long real_address;
-	__asm__ (
+	asm volatile(
 #ifndef __s390x__
-		 "   lra    %0,0(%1)\n"
-                 "   jz     0f\n"
-                 "   sr     %0,%0\n"
+		 "	lra	%0,0(%1)\n"
 #else /* __s390x__ */
-		 "   lrag   %0,0(%1)\n"
-                 "   jz     0f\n"
-                 "   slgr   %0,%0\n"
+		 "	lrag	%0,0(%1)\n"
 #endif /* __s390x__ */
+		 "	jz	0f\n"
+		 "	la	%0,0\n"
                  "0:"
-                 : "=a" (real_address) : "a" (address) : "cc" );
+		 : "=a" (real_address) : "a" (address) : "cc");
         return real_address;
 }
 
diff --git a/include/asm-s390/irqflags.h b/include/asm-s390/irqflags.h
index 3b566a5..3f26131 100644
--- a/include/asm-s390/irqflags.h
+++ b/include/asm-s390/irqflags.h
@@ -10,43 +10,93 @@
 
 #ifdef __KERNEL__
 
-/* interrupt control.. */
-#define raw_local_irq_enable() ({ \
-	unsigned long  __dummy; \
-	__asm__ __volatile__ ( \
-		"stosm 0(%1),0x03" \
-		: "=m" (__dummy) : "a" (&__dummy) : "memory" ); \
-	})
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
 
-#define raw_local_irq_disable() ({ \
-	unsigned long __flags; \
-	__asm__ __volatile__ ( \
-		"stnsm 0(%1),0xfc" : "=m" (__flags) : "a" (&__flags) ); \
-	__flags; \
-	})
-
-#define raw_local_save_flags(x)							\
-do {										\
-	typecheck(unsigned long, x);						\
-	__asm__ __volatile__("stosm 0(%1),0" : "=m" (x) : "a" (&x), "m" (x) );	\
-} while (0)
-
-#define raw_local_irq_restore(x)						\
-do {										\
-	typecheck(unsigned long, x);						\
-	__asm__ __volatile__("ssm   0(%0)" : : "a" (&x), "m" (x) : "memory");	\
-} while (0)
-
-#define raw_irqs_disabled()		\
-({					\
-	unsigned long flags;		\
-	raw_local_save_flags(flags);	\
-	!((flags >> __FLAG_SHIFT) & 3);	\
+/* store then or system mask. */
+#define __raw_local_irq_stosm(__or)					\
+({									\
+	unsigned long __mask;						\
+	asm volatile(							\
+		"	stosm	%0,%1"					\
+		: "=Q" (__mask) : "i" (__or) : "memory");		\
+	__mask;								\
 })
 
+/* store then and system mask. */
+#define __raw_local_irq_stnsm(__and)					\
+({									\
+	unsigned long __mask;						\
+	asm volatile(							\
+		"	stnsm	%0,%1"					\
+		: "=Q" (__mask) : "i" (__and) : "memory");		\
+	__mask;								\
+})
+
+/* set system mask. */
+#define __raw_local_irq_ssm(__mask)					\
+({									\
+	asm volatile("ssm   %0" : : "Q" (__mask) : "memory");		\
+})
+
+#else /* __GNUC__ */
+
+/* store then or system mask. */
+#define __raw_local_irq_stosm(__or)					\
+({									\
+	unsigned long __mask;						\
+	asm volatile(							\
+		"	stosm	0(%1),%2"				\
+		: "=m" (__mask)						\
+		: "a" (&__mask), "i" (__or) : "memory");		\
+	__mask;								\
+})
+
+/* store then and system mask. */
+#define __raw_local_irq_stnsm(__and)					\
+({									\
+	unsigned long __mask;						\
+	asm volatile(							\
+		"	stnsm	0(%1),%2"				\
+		: "=m" (__mask)						\
+		: "a" (&__mask), "i" (__and) : "memory");		\
+	__mask;								\
+})
+
+/* set system mask. */
+#define __raw_local_irq_ssm(__mask)					\
+({									\
+	asm volatile(							\
+		"	ssm	0(%0)"					\
+		: : "a" (&__mask), "m" (__mask) : "memory");		\
+})
+
+#endif /* __GNUC__ */
+
+/* interrupt control.. */
+static inline unsigned long raw_local_irq_enable(void)
+{
+	return __raw_local_irq_stosm(0x03);
+}
+
+static inline unsigned long raw_local_irq_disable(void)
+{
+	return __raw_local_irq_stnsm(0xfc);
+}
+
+#define raw_local_save_flags(x)						\
+do {									\
+	typecheck(unsigned long, x);					\
+	(x) = __raw_local_irq_stosm(0x00);				\
+} while (0)
+
+static inline void raw_local_irq_restore(unsigned long flags)
+{
+	__raw_local_irq_ssm(flags);
+}
+
 static inline int raw_irqs_disabled_flags(unsigned long flags)
 {
-	return !((flags >> __FLAG_SHIFT) & 3);
+	return !(flags & (3UL << (BITS_PER_LONG - 8)));
 }
 
 /* For spinlocks etc */
diff --git a/include/asm-s390/lowcore.h b/include/asm-s390/lowcore.h
index 18695d1..06583ed 100644
--- a/include/asm-s390/lowcore.h
+++ b/include/asm-s390/lowcore.h
@@ -359,7 +359,7 @@
 
 static inline void set_prefix(__u32 address)
 {
-        __asm__ __volatile__ ("spx %0" : : "m" (address) : "memory" );
+	asm volatile("spx %0" : : "m" (address) : "memory");
 }
 
 #define __PANIC_MAGIC           0xDEADC0DE
diff --git a/include/asm-s390/page.h b/include/asm-s390/page.h
index b2628dc..796c400 100644
--- a/include/asm-s390/page.h
+++ b/include/asm-s390/page.h
@@ -22,89 +22,45 @@
 #include <asm/setup.h>
 #ifndef __ASSEMBLY__
 
-#ifndef __s390x__
-
 static inline void clear_page(void *page)
 {
-	register_pair rp;
-
-	rp.subreg.even = (unsigned long) page;
-	rp.subreg.odd = (unsigned long) 4096;
-        asm volatile ("   slr  1,1\n"
-		      "   mvcl %0,0"
-		      : "+&a" (rp) : : "memory", "cc", "1" );
+	register unsigned long reg1 asm ("1") = 0;
+	register void *reg2 asm ("2") = page;
+	register unsigned long reg3 asm ("3") = 4096;
+	asm volatile(
+		"	mvcl	2,0"
+		: "+d" (reg2), "+d" (reg3) : "d" (reg1) : "memory", "cc");
 }
 
 static inline void copy_page(void *to, void *from)
 {
-        if (MACHINE_HAS_MVPG)
-		asm volatile ("   sr   0,0\n"
-			      "   mvpg %0,%1"
-			      : : "a" ((void *)(to)), "a" ((void *)(from))
-			      : "memory", "cc", "0" );
-	else
-		asm volatile ("   mvc  0(256,%0),0(%1)\n"
-			      "   mvc  256(256,%0),256(%1)\n"
-			      "   mvc  512(256,%0),512(%1)\n"
-			      "   mvc  768(256,%0),768(%1)\n"
-			      "   mvc  1024(256,%0),1024(%1)\n"
-			      "   mvc  1280(256,%0),1280(%1)\n"
-			      "   mvc  1536(256,%0),1536(%1)\n"
-			      "   mvc  1792(256,%0),1792(%1)\n"
-			      "   mvc  2048(256,%0),2048(%1)\n"
-			      "   mvc  2304(256,%0),2304(%1)\n"
-			      "   mvc  2560(256,%0),2560(%1)\n"
-			      "   mvc  2816(256,%0),2816(%1)\n"
-			      "   mvc  3072(256,%0),3072(%1)\n"
-			      "   mvc  3328(256,%0),3328(%1)\n"
-			      "   mvc  3584(256,%0),3584(%1)\n"
-			      "   mvc  3840(256,%0),3840(%1)\n"
-			      : : "a"((void *)(to)),"a"((void *)(from)) 
-			      : "memory" );
+	if (MACHINE_HAS_MVPG) {
+		register unsigned long reg0 asm ("0") = 0;
+		asm volatile(
+			"	mvpg	%0,%1"
+			: : "a" (to), "a" (from), "d" (reg0)
+			: "memory", "cc");
+	} else
+		asm volatile(
+			"	mvc	0(256,%0),0(%1)\n"
+			"	mvc	256(256,%0),256(%1)\n"
+			"	mvc	512(256,%0),512(%1)\n"
+			"	mvc	768(256,%0),768(%1)\n"
+			"	mvc	1024(256,%0),1024(%1)\n"
+			"	mvc	1280(256,%0),1280(%1)\n"
+			"	mvc	1536(256,%0),1536(%1)\n"
+			"	mvc	1792(256,%0),1792(%1)\n"
+			"	mvc	2048(256,%0),2048(%1)\n"
+			"	mvc	2304(256,%0),2304(%1)\n"
+			"	mvc	2560(256,%0),2560(%1)\n"
+			"	mvc	2816(256,%0),2816(%1)\n"
+			"	mvc	3072(256,%0),3072(%1)\n"
+			"	mvc	3328(256,%0),3328(%1)\n"
+			"	mvc	3584(256,%0),3584(%1)\n"
+			"	mvc	3840(256,%0),3840(%1)\n"
+			: : "a" (to), "a" (from) : "memory");
 }
 
-#else /* __s390x__ */
-
-static inline void clear_page(void *page)
-{
-        asm volatile ("   lgr  2,%0\n"
-                      "   lghi 3,4096\n"
-                      "   slgr 1,1\n"
-                      "   mvcl 2,0"
-                      : : "a" ((void *) (page))
-		      : "memory", "cc", "1", "2", "3" );
-}
-
-static inline void copy_page(void *to, void *from)
-{
-        if (MACHINE_HAS_MVPG)
-		asm volatile ("   sgr  0,0\n"
-			      "   mvpg %0,%1"
-			      : : "a" ((void *)(to)), "a" ((void *)(from))
-			      : "memory", "cc", "0" );
-	else
-		asm volatile ("   mvc  0(256,%0),0(%1)\n"
-			      "   mvc  256(256,%0),256(%1)\n"
-			      "   mvc  512(256,%0),512(%1)\n"
-			      "   mvc  768(256,%0),768(%1)\n"
-			      "   mvc  1024(256,%0),1024(%1)\n"
-			      "   mvc  1280(256,%0),1280(%1)\n"
-			      "   mvc  1536(256,%0),1536(%1)\n"
-			      "   mvc  1792(256,%0),1792(%1)\n"
-			      "   mvc  2048(256,%0),2048(%1)\n"
-			      "   mvc  2304(256,%0),2304(%1)\n"
-			      "   mvc  2560(256,%0),2560(%1)\n"
-			      "   mvc  2816(256,%0),2816(%1)\n"
-			      "   mvc  3072(256,%0),3072(%1)\n"
-			      "   mvc  3328(256,%0),3328(%1)\n"
-			      "   mvc  3584(256,%0),3584(%1)\n"
-			      "   mvc  3840(256,%0),3840(%1)\n"
-			      : : "a"((void *)(to)),"a"((void *)(from)) 
-			      : "memory" );
-}
-
-#endif /* __s390x__ */
-
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 
@@ -159,7 +115,7 @@
 static inline void
 page_set_storage_key(unsigned long addr, unsigned int skey)
 {
-	asm volatile ( "sske %0,%1" : : "d" (skey), "a" (addr) );
+	asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
 }
 
 static inline unsigned int
@@ -167,8 +123,7 @@
 {
 	unsigned int skey;
 
-	asm volatile ( "iske %0,%1" : "=d" (skey) : "a" (addr), "0" (0) );
-
+	asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr), "0" (0));
 	return skey;
 }
 
diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index e965309..83425cd 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -554,9 +554,10 @@
 		/* ipte in zarch mode can do the math */
 		pte_t *pto = ptep;
 #endif
-		asm volatile ("ipte %2,%3"
-			      : "=m" (*ptep) : "m" (*ptep),
-				"a" (pto), "a" (address) );
+		asm volatile(
+			"	ipte	%2,%3"
+			: "=m" (*ptep) : "m" (*ptep),
+			  "a" (pto), "a" (address));
 	}
 	pte_val(*ptep) = _PAGE_TYPE_EMPTY;
 }
@@ -609,16 +610,17 @@
 /*
  * Test and clear referenced bit in storage key.
  */
-#define page_test_and_clear_young(page)					  \
-({									  \
-	struct page *__page = (page);					  \
-	unsigned long __physpage = __pa((__page-mem_map) << PAGE_SHIFT);  \
-	int __ccode;							  \
-	asm volatile ("rrbe 0,%1\n\t"					  \
-		      "ipm  %0\n\t"					  \
-		      "srl  %0,28\n\t" 					  \
-                      : "=d" (__ccode) : "a" (__physpage) : "cc" );	  \
-	(__ccode & 2);							  \
+#define page_test_and_clear_young(page)					\
+({									\
+	struct page *__page = (page);					\
+	unsigned long __physpage = __pa((__page-mem_map) << PAGE_SHIFT);\
+	int __ccode;							\
+	asm volatile(							\
+		"	rrbe	0,%1\n"					\
+		"	ipm	%0\n"					\
+		"	srl	%0,28\n"				\
+		: "=d" (__ccode) : "a" (__physpage) : "cc");		\
+	(__ccode & 2);							\
 })
 
 /*
diff --git a/include/asm-s390/processor.h b/include/asm-s390/processor.h
index 578c220..cbbedc6 100644
--- a/include/asm-s390/processor.h
+++ b/include/asm-s390/processor.h
@@ -13,7 +13,6 @@
 #ifndef __ASM_S390_PROCESSOR_H
 #define __ASM_S390_PROCESSOR_H
 
-#include <asm/page.h>
 #include <asm/ptrace.h>
 
 #ifdef __KERNEL__
@@ -21,7 +20,7 @@
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
  */
-#define current_text_addr() ({ void *pc; __asm__("basr %0,0":"=a"(pc)); pc; })
+#define current_text_addr() ({ void *pc; asm("basr %0,0" : "=a" (pc)); pc; })
 
 /*
  *  CPU type and hardware bug flags. Kept separately for each CPU.
@@ -202,7 +201,7 @@
 static inline void cpu_relax(void)
 {
 	if (MACHINE_HAS_DIAG44)
-		asm volatile ("diag 0,0,68" : : : "memory");
+		asm volatile("diag 0,0,68" : : : "memory");
 	else
 		barrier();
 }
@@ -213,9 +212,9 @@
 static inline void __load_psw(psw_t psw)
 {
 #ifndef __s390x__
-	asm volatile ("lpsw  0(%0)" : : "a" (&psw), "m" (psw) : "cc" );
+	asm volatile("lpsw  0(%0)" : : "a" (&psw), "m" (psw) : "cc");
 #else
-	asm volatile ("lpswe 0(%0)" : : "a" (&psw), "m" (psw) : "cc" );
+	asm volatile("lpswe 0(%0)" : : "a" (&psw), "m" (psw) : "cc");
 #endif
 }
 
@@ -232,20 +231,20 @@
 	psw.mask = mask;
 
 #ifndef __s390x__
-	asm volatile (
-		"    basr %0,0\n"
-		"0:  ahi  %0,1f-0b\n"
-		"    st	  %0,4(%1)\n"
-		"    lpsw 0(%1)\n"
+	asm volatile(
+		"	basr	%0,0\n"
+		"0:	ahi	%0,1f-0b\n"
+		"	st	%0,4(%1)\n"
+		"	lpsw	0(%1)\n"
 		"1:"
-		: "=&d" (addr) : "a" (&psw), "m" (psw) : "memory", "cc" );
+		: "=&d" (addr) : "a" (&psw), "m" (psw) : "memory", "cc");
 #else /* __s390x__ */
-	asm volatile (
-		"    larl  %0,1f\n"
-		"    stg   %0,8(%1)\n"
-		"    lpswe 0(%1)\n"
+	asm volatile(
+		"	larl	%0,1f\n"
+		"	stg	%0,8(%1)\n"
+		"	lpswe	0(%1)\n"
 		"1:"
-		: "=&d" (addr) : "a" (&psw), "m" (psw) : "memory", "cc" );
+		: "=&d" (addr) : "a" (&psw), "m" (psw) : "memory", "cc");
 #endif /* __s390x__ */
 }
  
@@ -274,56 +273,57 @@
          * the processor is dead afterwards
          */
 #ifndef __s390x__
-        asm volatile ("    stctl 0,0,0(%2)\n"
-                      "    ni    0(%2),0xef\n" /* switch off protection */
-                      "    lctl  0,0,0(%2)\n"
-                      "    stpt  0xd8\n"       /* store timer */
-                      "    stckc 0xe0\n"       /* store clock comparator */
-                      "    stpx  0x108\n"      /* store prefix register */
-                      "    stam  0,15,0x120\n" /* store access registers */
-                      "    std   0,0x160\n"    /* store f0 */
-                      "    std   2,0x168\n"    /* store f2 */
-                      "    std   4,0x170\n"    /* store f4 */
-                      "    std   6,0x178\n"    /* store f6 */
-                      "    stm   0,15,0x180\n" /* store general registers */
-                      "    stctl 0,15,0x1c0\n" /* store control registers */
-                      "    oi    0x1c0,0x10\n" /* fake protection bit */
-                      "    lpsw 0(%1)"
-                      : "=m" (ctl_buf)
-		      : "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc" );
+	asm volatile(
+		"	stctl	0,0,0(%2)\n"
+		"	ni	0(%2),0xef\n"	/* switch off protection */
+		"	lctl	0,0,0(%2)\n"
+		"	stpt	0xd8\n"		/* store timer */
+		"	stckc	0xe0\n"		/* store clock comparator */
+		"	stpx	0x108\n"	/* store prefix register */
+		"	stam	0,15,0x120\n"	/* store access registers */
+		"	std	0,0x160\n"	/* store f0 */
+		"	std	2,0x168\n"	/* store f2 */
+		"	std	4,0x170\n"	/* store f4 */
+		"	std	6,0x178\n"	/* store f6 */
+		"	stm	0,15,0x180\n"	/* store general registers */
+		"	stctl	0,15,0x1c0\n"	/* store control registers */
+		"	oi	0x1c0,0x10\n"	/* fake protection bit */
+		"	lpsw	0(%1)"
+		: "=m" (ctl_buf)
+		: "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc");
 #else /* __s390x__ */
-        asm volatile ("    stctg 0,0,0(%2)\n"
-                      "    ni    4(%2),0xef\n" /* switch off protection */
-                      "    lctlg 0,0,0(%2)\n"
-                      "    lghi  1,0x1000\n"
-                      "    stpt  0x328(1)\n"      /* store timer */
-                      "    stckc 0x330(1)\n"      /* store clock comparator */
-                      "    stpx  0x318(1)\n"      /* store prefix register */
-                      "    stam  0,15,0x340(1)\n" /* store access registers */
-                      "    stfpc 0x31c(1)\n"      /* store fpu control */
-                      "    std   0,0x200(1)\n"    /* store f0 */
-                      "    std   1,0x208(1)\n"    /* store f1 */
-                      "    std   2,0x210(1)\n"    /* store f2 */
-                      "    std   3,0x218(1)\n"    /* store f3 */
-                      "    std   4,0x220(1)\n"    /* store f4 */
-                      "    std   5,0x228(1)\n"    /* store f5 */
-                      "    std   6,0x230(1)\n"    /* store f6 */
-                      "    std   7,0x238(1)\n"    /* store f7 */
-                      "    std   8,0x240(1)\n"    /* store f8 */
-                      "    std   9,0x248(1)\n"    /* store f9 */
-                      "    std   10,0x250(1)\n"   /* store f10 */
-                      "    std   11,0x258(1)\n"   /* store f11 */
-                      "    std   12,0x260(1)\n"   /* store f12 */
-                      "    std   13,0x268(1)\n"   /* store f13 */
-                      "    std   14,0x270(1)\n"   /* store f14 */
-                      "    std   15,0x278(1)\n"   /* store f15 */
-                      "    stmg  0,15,0x280(1)\n" /* store general registers */
-                      "    stctg 0,15,0x380(1)\n" /* store control registers */
-                      "    oi    0x384(1),0x10\n" /* fake protection bit */
-                      "    lpswe 0(%1)"
-                      : "=m" (ctl_buf)
-		      : "a" (&dw_psw), "a" (&ctl_buf),
-		        "m" (dw_psw) : "cc", "0", "1");
+	asm volatile(
+		"	stctg	0,0,0(%2)\n"
+		"	ni	4(%2),0xef\n"	/* switch off protection */
+		"	lctlg	0,0,0(%2)\n"
+		"	lghi	1,0x1000\n"
+		"	stpt	0x328(1)\n"	/* store timer */
+		"	stckc	0x330(1)\n"	/* store clock comparator */
+		"	stpx	0x318(1)\n"	/* store prefix register */
+		"	stam	0,15,0x340(1)\n"/* store access registers */
+		"	stfpc	0x31c(1)\n"	/* store fpu control */
+		"	std	0,0x200(1)\n"	/* store f0 */
+		"	std	1,0x208(1)\n"	/* store f1 */
+		"	std	2,0x210(1)\n"	/* store f2 */
+		"	std	3,0x218(1)\n"	/* store f3 */
+		"	std	4,0x220(1)\n"	/* store f4 */
+		"	std	5,0x228(1)\n"	/* store f5 */
+		"	std	6,0x230(1)\n"	/* store f6 */
+		"	std	7,0x238(1)\n"	/* store f7 */
+		"	std	8,0x240(1)\n"	/* store f8 */
+		"	std	9,0x248(1)\n"	/* store f9 */
+		"	std	10,0x250(1)\n"	/* store f10 */
+		"	std	11,0x258(1)\n"	/* store f11 */
+		"	std	12,0x260(1)\n"	/* store f12 */
+		"	std	13,0x268(1)\n"	/* store f13 */
+		"	std	14,0x270(1)\n"	/* store f14 */
+		"	std	15,0x278(1)\n"	/* store f15 */
+		"	stmg	0,15,0x280(1)\n"/* store general registers */
+		"	stctg	0,15,0x380(1)\n"/* store control registers */
+		"	oi	0x384(1),0x10\n"/* fake protection bit */
+		"	lpswe	0(%1)"
+		: "=m" (ctl_buf)
+		: "a" (&dw_psw), "a" (&ctl_buf), "m" (dw_psw) : "cc", "0");
 #endif /* __s390x__ */
 }
 
diff --git a/include/asm-s390/ptrace.h b/include/asm-s390/ptrace.h
index 4d75d77..8d2bf65 100644
--- a/include/asm-s390/ptrace.h
+++ b/include/asm-s390/ptrace.h
@@ -479,7 +479,7 @@
 static inline void
 psw_set_key(unsigned int key)
 {
-	asm volatile ( "spka 0(%0)" : : "d" (key) );
+	asm volatile("spka 0(%0)" : : "d" (key));
 }
 
 #endif /* __ASSEMBLY__ */
diff --git a/include/asm-s390/rwsem.h b/include/asm-s390/rwsem.h
index 13ec169..90f4ecc 100644
--- a/include/asm-s390/rwsem.h
+++ b/include/asm-s390/rwsem.h
@@ -122,23 +122,23 @@
 {
 	signed long old, new;
 
-	__asm__ __volatile__(
+	asm volatile(
 #ifndef __s390x__
-		"   l    %0,0(%3)\n"
-		"0: lr   %1,%0\n"
-		"   ahi  %1,%5\n"
-		"   cs   %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	l	%0,0(%3)\n"
+		"0:	lr	%1,%0\n"
+		"	ahi	%1,%5\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b"
 #else /* __s390x__ */
-		"   lg   %0,0(%3)\n"
-		"0: lgr  %1,%0\n"
-		"   aghi %1,%5\n"
-		"   csg  %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	lg	%0,0(%3)\n"
+		"0:	lgr	%1,%0\n"
+		"	aghi	%1,%5\n"
+		"	csg	%0,%1,0(%3)\n"
+		"	jl	0b"
 #endif /* __s390x__ */
-                : "=&d" (old), "=&d" (new), "=m" (sem->count)
+		: "=&d" (old), "=&d" (new), "=m" (sem->count)
 		: "a" (&sem->count), "m" (sem->count),
-		  "i" (RWSEM_ACTIVE_READ_BIAS) : "cc", "memory" );
+		  "i" (RWSEM_ACTIVE_READ_BIAS) : "cc", "memory");
 	if (old < 0)
 		rwsem_down_read_failed(sem);
 }
@@ -150,27 +150,27 @@
 {
 	signed long old, new;
 
-	__asm__ __volatile__(
+	asm volatile(
 #ifndef __s390x__
-		"   l    %0,0(%3)\n"
-		"0: ltr  %1,%0\n"
-		"   jm   1f\n"
-		"   ahi  %1,%5\n"
-		"   cs   %0,%1,0(%3)\n"
-		"   jl   0b\n"
+		"	l	%0,0(%3)\n"
+		"0:	ltr	%1,%0\n"
+		"	jm	1f\n"
+		"	ahi	%1,%5\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b\n"
 		"1:"
 #else /* __s390x__ */
-		"   lg   %0,0(%3)\n"
-		"0: ltgr %1,%0\n"
-		"   jm   1f\n"
-		"   aghi %1,%5\n"
-		"   csg  %0,%1,0(%3)\n"
-		"   jl   0b\n"
+		"	lg	%0,0(%3)\n"
+		"0:	ltgr	%1,%0\n"
+		"	jm	1f\n"
+		"	aghi	%1,%5\n"
+		"	csg	%0,%1,0(%3)\n"
+		"	jl	0b\n"
 		"1:"
 #endif /* __s390x__ */
-                : "=&d" (old), "=&d" (new), "=m" (sem->count)
+		: "=&d" (old), "=&d" (new), "=m" (sem->count)
 		: "a" (&sem->count), "m" (sem->count),
-		  "i" (RWSEM_ACTIVE_READ_BIAS) : "cc", "memory" );
+		  "i" (RWSEM_ACTIVE_READ_BIAS) : "cc", "memory");
 	return old >= 0 ? 1 : 0;
 }
 
@@ -182,23 +182,23 @@
 	signed long old, new, tmp;
 
 	tmp = RWSEM_ACTIVE_WRITE_BIAS;
-	__asm__ __volatile__(
+	asm volatile(
 #ifndef __s390x__
-		"   l    %0,0(%3)\n"
-		"0: lr   %1,%0\n"
-		"   a    %1,%5\n"
-		"   cs   %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	l	%0,0(%3)\n"
+		"0:	lr	%1,%0\n"
+		"	a	%1,%5\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b"
 #else /* __s390x__ */
-		"   lg   %0,0(%3)\n"
-		"0: lgr  %1,%0\n"
-		"   ag   %1,%5\n"
-		"   csg  %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	lg	%0,0(%3)\n"
+		"0:	lgr	%1,%0\n"
+		"	ag	%1,%5\n"
+		"	csg	%0,%1,0(%3)\n"
+		"	jl	0b"
 #endif /* __s390x__ */
-                : "=&d" (old), "=&d" (new), "=m" (sem->count)
+		: "=&d" (old), "=&d" (new), "=m" (sem->count)
 		: "a" (&sem->count), "m" (sem->count), "m" (tmp)
-		: "cc", "memory" );
+		: "cc", "memory");
 	if (old != 0)
 		rwsem_down_write_failed(sem);
 }
@@ -215,24 +215,24 @@
 {
 	signed long old;
 
-	__asm__ __volatile__(
+	asm volatile(
 #ifndef __s390x__
-		"   l    %0,0(%2)\n"
-		"0: ltr  %0,%0\n"
-		"   jnz  1f\n"
-		"   cs   %0,%4,0(%2)\n"
-		"   jl   0b\n"
+		"	l	%0,0(%2)\n"
+		"0:	ltr	%0,%0\n"
+		"	jnz	1f\n"
+		"	cs	%0,%4,0(%2)\n"
+		"	jl	0b\n"
 #else /* __s390x__ */
-		"   lg   %0,0(%2)\n"
-		"0: ltgr %0,%0\n"
-		"   jnz  1f\n"
-		"   csg  %0,%4,0(%2)\n"
-		"   jl   0b\n"
+		"	lg	%0,0(%2)\n"
+		"0:	ltgr	%0,%0\n"
+		"	jnz	1f\n"
+		"	csg	%0,%4,0(%2)\n"
+		"	jl	0b\n"
 #endif /* __s390x__ */
 		"1:"
-                : "=&d" (old), "=m" (sem->count)
+		: "=&d" (old), "=m" (sem->count)
 		: "a" (&sem->count), "m" (sem->count),
-		  "d" (RWSEM_ACTIVE_WRITE_BIAS) : "cc", "memory" );
+		  "d" (RWSEM_ACTIVE_WRITE_BIAS) : "cc", "memory");
 	return (old == RWSEM_UNLOCKED_VALUE) ? 1 : 0;
 }
 
@@ -243,24 +243,24 @@
 {
 	signed long old, new;
 
-	__asm__ __volatile__(
+	asm volatile(
 #ifndef __s390x__
-		"   l    %0,0(%3)\n"
-		"0: lr   %1,%0\n"
-		"   ahi  %1,%5\n"
-		"   cs   %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	l	%0,0(%3)\n"
+		"0:	lr	%1,%0\n"
+		"	ahi	%1,%5\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b"
 #else /* __s390x__ */
-		"   lg   %0,0(%3)\n"
-		"0: lgr  %1,%0\n"
-		"   aghi %1,%5\n"
-		"   csg  %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	lg	%0,0(%3)\n"
+		"0:	lgr	%1,%0\n"
+		"	aghi	%1,%5\n"
+		"	csg	%0,%1,0(%3)\n"
+		"	jl	0b"
 #endif /* __s390x__ */
-                : "=&d" (old), "=&d" (new), "=m" (sem->count)
+		: "=&d" (old), "=&d" (new), "=m" (sem->count)
 		: "a" (&sem->count), "m" (sem->count),
 		  "i" (-RWSEM_ACTIVE_READ_BIAS)
-		: "cc", "memory" );
+		: "cc", "memory");
 	if (new < 0)
 		if ((new & RWSEM_ACTIVE_MASK) == 0)
 			rwsem_wake(sem);
@@ -274,23 +274,23 @@
 	signed long old, new, tmp;
 
 	tmp = -RWSEM_ACTIVE_WRITE_BIAS;
-	__asm__ __volatile__(
+	asm volatile(
 #ifndef __s390x__
-		"   l    %0,0(%3)\n"
-		"0: lr   %1,%0\n"
-		"   a    %1,%5\n"
-		"   cs   %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	l	%0,0(%3)\n"
+		"0:	lr	%1,%0\n"
+		"	a	%1,%5\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b"
 #else /* __s390x__ */
-		"   lg   %0,0(%3)\n"
-		"0: lgr  %1,%0\n"
-		"   ag   %1,%5\n"
-		"   csg  %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	lg	%0,0(%3)\n"
+		"0:	lgr	%1,%0\n"
+		"	ag	%1,%5\n"
+		"	csg	%0,%1,0(%3)\n"
+		"	jl	0b"
 #endif /* __s390x__ */
-                : "=&d" (old), "=&d" (new), "=m" (sem->count)
+		: "=&d" (old), "=&d" (new), "=m" (sem->count)
 		: "a" (&sem->count), "m" (sem->count), "m" (tmp)
-		: "cc", "memory" );
+		: "cc", "memory");
 	if (new < 0)
 		if ((new & RWSEM_ACTIVE_MASK) == 0)
 			rwsem_wake(sem);
@@ -304,23 +304,23 @@
 	signed long old, new, tmp;
 
 	tmp = -RWSEM_WAITING_BIAS;
-	__asm__ __volatile__(
+	asm volatile(
 #ifndef __s390x__
-		"   l    %0,0(%3)\n"
-		"0: lr   %1,%0\n"
-		"   a    %1,%5\n"
-		"   cs   %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	l	%0,0(%3)\n"
+		"0:	lr	%1,%0\n"
+		"	a	%1,%5\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b"
 #else /* __s390x__ */
-		"   lg   %0,0(%3)\n"
-		"0: lgr  %1,%0\n"
-		"   ag   %1,%5\n"
-		"   csg  %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	lg	%0,0(%3)\n"
+		"0:	lgr	%1,%0\n"
+		"	ag	%1,%5\n"
+		"	csg	%0,%1,0(%3)\n"
+		"	jl	0b"
 #endif /* __s390x__ */
-                : "=&d" (old), "=&d" (new), "=m" (sem->count)
+		: "=&d" (old), "=&d" (new), "=m" (sem->count)
 		: "a" (&sem->count), "m" (sem->count), "m" (tmp)
-		: "cc", "memory" );
+		: "cc", "memory");
 	if (new > 1)
 		rwsem_downgrade_wake(sem);
 }
@@ -332,23 +332,23 @@
 {
 	signed long old, new;
 
-	__asm__ __volatile__(
+	asm volatile(
 #ifndef __s390x__
-		"   l    %0,0(%3)\n"
-		"0: lr   %1,%0\n"
-		"   ar   %1,%5\n"
-		"   cs   %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	l	%0,0(%3)\n"
+		"0:	lr	%1,%0\n"
+		"	ar	%1,%5\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b"
 #else /* __s390x__ */
-		"   lg   %0,0(%3)\n"
-		"0: lgr  %1,%0\n"
-		"   agr  %1,%5\n"
-		"   csg  %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	lg	%0,0(%3)\n"
+		"0:	lgr	%1,%0\n"
+		"	agr	%1,%5\n"
+		"	csg	%0,%1,0(%3)\n"
+		"	jl	0b"
 #endif /* __s390x__ */
-                : "=&d" (old), "=&d" (new), "=m" (sem->count)
+		: "=&d" (old), "=&d" (new), "=m" (sem->count)
 		: "a" (&sem->count), "m" (sem->count), "d" (delta)
-		: "cc", "memory" );
+		: "cc", "memory");
 }
 
 /*
@@ -358,23 +358,23 @@
 {
 	signed long old, new;
 
-	__asm__ __volatile__(
+	asm volatile(
 #ifndef __s390x__
-		"   l    %0,0(%3)\n"
-		"0: lr   %1,%0\n"
-		"   ar   %1,%5\n"
-		"   cs   %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	l	%0,0(%3)\n"
+		"0:	lr	%1,%0\n"
+		"	ar	%1,%5\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b"
 #else /* __s390x__ */
-		"   lg   %0,0(%3)\n"
-		"0: lgr  %1,%0\n"
-		"   agr  %1,%5\n"
-		"   csg  %0,%1,0(%3)\n"
-		"   jl   0b"
+		"	lg	%0,0(%3)\n"
+		"0:	lgr	%1,%0\n"
+		"	agr	%1,%5\n"
+		"	csg	%0,%1,0(%3)\n"
+		"	jl	0b"
 #endif /* __s390x__ */
-                : "=&d" (old), "=&d" (new), "=m" (sem->count)
+		: "=&d" (old), "=&d" (new), "=m" (sem->count)
 		: "a" (&sem->count), "m" (sem->count), "d" (delta)
-		: "cc", "memory" );
+		: "cc", "memory");
 	return new;
 }
 
diff --git a/include/asm-s390/semaphore.h b/include/asm-s390/semaphore.h
index 32cdc69..dbce058 100644
--- a/include/asm-s390/semaphore.h
+++ b/include/asm-s390/semaphore.h
@@ -85,17 +85,17 @@
 	 *       sem->count.counter = --new_val;
 	 * In the ppc code this is called atomic_dec_if_positive.
 	 */
-	__asm__ __volatile__ (
-		"   l    %0,0(%3)\n"
-		"0: ltr  %1,%0\n"
-		"   jle  1f\n"
-		"   ahi  %1,-1\n"
-		"   cs   %0,%1,0(%3)\n"
-		"   jl   0b\n"
+	asm volatile(
+		"	l	%0,0(%3)\n"
+		"0:	ltr	%1,%0\n"
+		"	jle	1f\n"
+		"	ahi	%1,-1\n"
+		"	cs	%0,%1,0(%3)\n"
+		"	jl	0b\n"
 		"1:"
 		: "=&d" (old_val), "=&d" (new_val), "=m" (sem->count.counter)
 		: "a" (&sem->count.counter), "m" (sem->count.counter)
-		: "cc", "memory" );
+		: "cc", "memory");
 	return old_val <= 0;
 }
 
diff --git a/include/asm-s390/sfp-machine.h b/include/asm-s390/sfp-machine.h
index de69dfa..8ca8c77 100644
--- a/include/asm-s390/sfp-machine.h
+++ b/include/asm-s390/sfp-machine.h
@@ -76,21 +76,23 @@
 	unsigned int __r2 = (x2) + (y2);			\
 	unsigned int __r1 = (x1);				\
 	unsigned int __r0 = (x0);				\
-	__asm__ ("   alr %2,%3\n"				\
-		 "   brc 12,0f\n"				\
-		 "   lhi 0,1\n"					\
-		 "   alr %1,0\n"				\
-		 "   brc 12,0f\n"				\
-		 "   alr %0,0\n"				\
-		 "0:"						\
-		 : "+&d" (__r2), "+&d" (__r1), "+&d" (__r0)	\
-		 : "d" (y0), "i" (1) : "cc", "0" );		\
-	__asm__ ("   alr %1,%2\n"				\
-		 "   brc 12,0f\n"				\
-		 "   ahi %0,1\n"				\
-		 "0:"						\
-		 : "+&d" (__r2), "+&d" (__r1)			\
-		 : "d" (y1) : "cc" );				\
+	asm volatile(						\
+		"	alr	%2,%3\n"			\
+		"	brc	12,0f\n"			\
+		"	lhi	0,1\n"				\
+		"	alr	%1,0\n"				\
+		"	brc	12,0f\n"			\
+		"	alr	%0,0\n"				\
+		"0:"						\
+		: "+&d" (__r2), "+&d" (__r1), "+&d" (__r0)	\
+		: "d" (y0), "i" (1) : "cc", "0" );		\
+	asm volatile(						\
+		"	alr	%1,%2\n"			\
+		"	brc	12,0f\n"			\
+		"	ahi	%0,1\n"				\
+		"0:"						\
+		: "+&d" (__r2), "+&d" (__r1)			\
+		: "d" (y1) : "cc");				\
 	(r2) = __r2;						\
 	(r1) = __r1;						\
 	(r0) = __r0;						\
@@ -100,21 +102,23 @@
 	unsigned int __r2 = (x2) - (y2);			\
 	unsigned int __r1 = (x1);				\
 	unsigned int __r0 = (x0);				\
-	__asm__ ("   slr %2,%3\n"				\
-		 "   brc 3,0f\n"				\
-		 "   lhi 0,1\n"					\
-		 "   slr %1,0\n"				\
-		 "   brc 3,0f\n"				\
-		 "   slr %0,0\n"				\
-		 "0:"						\
-		 : "+&d" (__r2), "+&d" (__r1), "+&d" (__r0)	\
-		 : "d" (y0) : "cc", "0" );			\
-	__asm__ ("   slr %1,%2\n"				\
-		 "   brc 3,0f\n"				\
-		 "   ahi %0,-1\n"				\
-		 "0:"						\
-		 : "+&d" (__r2), "+&d" (__r1)			\
-		 : "d" (y1) : "cc" );				\
+	asm volatile(						\
+		"	slr   %2,%3\n"				\
+		"	brc	3,0f\n"				\
+		"	lhi	0,1\n"				\
+		"	slr	%1,0\n"				\
+		"	brc	3,0f\n"				\
+		"	slr	%0,0\n"				\
+		"0:"						\
+		: "+&d" (__r2), "+&d" (__r1), "+&d" (__r0)	\
+		: "d" (y0) : "cc", "0");			\
+	asm volatile(						\
+		"	slr	%1,%2\n"			\
+		"	brc	3,0f\n"				\
+		"	ahi	%0,-1\n"			\
+		"0:"						\
+		: "+&d" (__r2), "+&d" (__r1)			\
+		: "d" (y1) : "cc");				\
 	(r2) = __r2;						\
 	(r1) = __r1;						\
 	(r0) = __r0;						\
diff --git a/include/asm-s390/sigp.h b/include/asm-s390/sigp.h
index fc56458..e16d56f 100644
--- a/include/asm-s390/sigp.h
+++ b/include/asm-s390/sigp.h
@@ -70,16 +70,16 @@
 static inline sigp_ccode
 signal_processor(__u16 cpu_addr, sigp_order_code order_code)
 {
+	register unsigned long reg1 asm ("1") = 0;
 	sigp_ccode ccode;
 
-	__asm__ __volatile__(
-		"    sr     1,1\n"        /* parameter=0 in gpr 1 */
-		"    sigp   1,%1,0(%2)\n"
-		"    ipm    %0\n"
-		"    srl    %0,28\n"
-		: "=d" (ccode)
-		: "d" (__cpu_logical_map[cpu_addr]), "a" (order_code)
-		: "cc" , "memory", "1" );
+	asm volatile(
+		"	sigp	%1,%2,0(%3)\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		:	"=d"	(ccode)
+		: "d" (reg1), "d" (__cpu_logical_map[cpu_addr]),
+		  "a" (order_code) : "cc" , "memory");
 	return ccode;
 }
 
@@ -87,20 +87,18 @@
  * Signal processor with parameter
  */
 static inline sigp_ccode
-signal_processor_p(__u32 parameter, __u16 cpu_addr,
-		   sigp_order_code order_code)
+signal_processor_p(__u32 parameter, __u16 cpu_addr, sigp_order_code order_code)
 {
+	register unsigned int reg1 asm ("1") = parameter;
 	sigp_ccode ccode;
-	
-	__asm__ __volatile__(
-		"    lr     1,%1\n"       /* parameter in gpr 1 */
-		"    sigp   1,%2,0(%3)\n"
-		"    ipm    %0\n"
-		"    srl    %0,28\n"
+
+	asm volatile(
+		"	sigp	%1,%2,0(%3)\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
 		: "=d" (ccode)
-		: "d" (parameter), "d" (__cpu_logical_map[cpu_addr]),
-                  "a" (order_code)
-		: "cc" , "memory", "1" );
+		: "d" (reg1), "d" (__cpu_logical_map[cpu_addr]),
+		  "a" (order_code) : "cc" , "memory");
 	return ccode;
 }
 
@@ -108,24 +106,21 @@
  * Signal processor with parameter and return status
  */
 static inline sigp_ccode
-signal_processor_ps(__u32 *statusptr, __u32 parameter,
-		    __u16 cpu_addr, sigp_order_code order_code)
+signal_processor_ps(__u32 *statusptr, __u32 parameter, __u16 cpu_addr,
+		    sigp_order_code order_code)
 {
+	register unsigned int reg1 asm ("1") = parameter;
 	sigp_ccode ccode;
-	
-	__asm__ __volatile__(
-		"    sr     2,2\n"        /* clear status */
-		"    lr     3,%2\n"       /* parameter in gpr 3 */
-		"    sigp   2,%3,0(%4)\n"
-		"    st     2,%1\n"
-		"    ipm    %0\n"
-		"    srl    %0,28\n"
-		: "=d" (ccode), "=m" (*statusptr)
-		: "d" (parameter), "d" (__cpu_logical_map[cpu_addr]),
-                  "a" (order_code)
-		: "cc" , "memory", "2" , "3"
-		);
-   return ccode;
+
+	asm volatile(
+		"	sigp	%1,%2,0(%3)\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (ccode), "+d" (reg1)
+		: "d" (__cpu_logical_map[cpu_addr]), "a" (order_code)
+		: "cc" , "memory");
+	*statusptr = reg1;
+	return ccode;
 }
 
 #endif /* __SIGP__ */
diff --git a/include/asm-s390/smp.h b/include/asm-s390/smp.h
index 9fb02e9..c3cf030 100644
--- a/include/asm-s390/smp.h
+++ b/include/asm-s390/smp.h
@@ -56,7 +56,7 @@
 {
         __u16 cpu_address;
  
-        __asm__ ("stap %0\n" : "=m" (cpu_address));
+	asm volatile("stap %0" : "=m" (cpu_address));
         return cpu_address;
 }
 
diff --git a/include/asm-s390/spinlock.h b/include/asm-s390/spinlock.h
index 273dbec..ce3edf6d6 100644
--- a/include/asm-s390/spinlock.h
+++ b/include/asm-s390/spinlock.h
@@ -11,17 +11,36 @@
 #ifndef __ASM_SPINLOCK_H
 #define __ASM_SPINLOCK_H
 
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
+
 static inline int
 _raw_compare_and_swap(volatile unsigned int *lock,
 		      unsigned int old, unsigned int new)
 {
-	asm volatile ("cs %0,%3,0(%4)"
-		      : "=d" (old), "=m" (*lock)
-		      : "0" (old), "d" (new), "a" (lock), "m" (*lock)
-		      : "cc", "memory" );
+	asm volatile(
+		"	cs	%0,%3,%1"
+		: "=d" (old), "=Q" (*lock)
+		: "0" (old), "d" (new), "Q" (*lock)
+		: "cc", "memory" );
 	return old;
 }
 
+#else /* __GNUC__ */
+
+static inline int
+_raw_compare_and_swap(volatile unsigned int *lock,
+		      unsigned int old, unsigned int new)
+{
+	asm volatile(
+		"	cs	%0,%3,0(%4)"
+		: "=d" (old), "=m" (*lock)
+		: "0" (old), "d" (new), "a" (lock), "m" (*lock)
+		: "cc", "memory" );
+	return old;
+}
+
+#endif /* __GNUC__ */
+
 /*
  * Simple spin lock operations.  There are two variants, one clears IRQ's
  * on the local processor, one does not.
diff --git a/include/asm-s390/string.h b/include/asm-s390/string.h
index 23a4c39..d074673 100644
--- a/include/asm-s390/string.h
+++ b/include/asm-s390/string.h
@@ -60,12 +60,13 @@
 	register int r0 asm("0") = (char) c;
 	const void *ret = s + n;
 
-	asm volatile ("0: srst  %0,%1\n"
-		      "   jo    0b\n"
-		      "   jl	1f\n"
-		      "   la    %0,0\n"
-		      "1:"
-		      : "+a" (ret), "+&a" (s) : "d" (r0) : "cc" );
+	asm volatile(
+		"0:	srst	%0,%1\n"
+		"	jo	0b\n"
+		"	jl	1f\n"
+		"	la	%0,0\n"
+		"1:"
+		: "+a" (ret), "+&a" (s) : "d" (r0) : "cc");
 	return (void *) ret;
 }
 
@@ -74,9 +75,10 @@
 	register int r0 asm("0") = (char) c;
 	const void *ret = s + n;
 
-	asm volatile ("0: srst  %0,%1\n"
-		      "   jo    0b\n"
-		      : "+a" (ret), "+&a" (s) : "d" (r0) : "cc" );
+	asm volatile(
+		"0:	srst	%0,%1\n"
+		"	jo	0b\n"
+		: "+a" (ret), "+&a" (s) : "d" (r0) : "cc");
 	return (void *) ret;
 }
 
@@ -86,12 +88,13 @@
 	unsigned long dummy;
 	char *ret = dst;
 
-	asm volatile ("0: srst  %0,%1\n"
-		      "   jo    0b\n"
-		      "1: mvst  %0,%2\n"
-		      "   jo    1b"
-		      : "=&a" (dummy), "+a" (dst), "+a" (src)
-		      : "d" (r0), "0" (0) : "cc", "memory" );
+	asm volatile(
+		"0:	srst	%0,%1\n"
+		"	jo	0b\n"
+		"1:	mvst	%0,%2\n"
+		"	jo	1b"
+		: "=&a" (dummy), "+a" (dst), "+a" (src)
+		: "d" (r0), "0" (0) : "cc", "memory" );
 	return ret;
 }
 
@@ -100,10 +103,11 @@
 	register int r0 asm("0") = 0;
 	char *ret = dst;
 
-	asm volatile ("0: mvst  %0,%1\n"
-		      "   jo    0b"
-		      : "+&a" (dst), "+&a" (src) : "d" (r0)
-		      : "cc", "memory" );
+	asm volatile(
+		"0:	mvst	%0,%1\n"
+		"	jo	0b"
+		: "+&a" (dst), "+&a" (src) : "d" (r0)
+		: "cc", "memory");
 	return ret;
 }
 
@@ -112,9 +116,10 @@
 	register unsigned long r0 asm("0") = 0;
 	const char *tmp = s;
 
-	asm volatile ("0: srst  %0,%1\n"
-		      "   jo    0b"
-		      : "+d" (r0), "+a" (tmp) :  : "cc" );
+	asm volatile(
+		"0:	srst	%0,%1\n"
+		"	jo	0b"
+		: "+d" (r0), "+a" (tmp) :  : "cc");
 	return r0 - (unsigned long) s;
 }
 
@@ -124,9 +129,10 @@
 	const char *tmp = s;
 	const char *end = s + n;
 
-	asm volatile ("0: srst  %0,%1\n"
-		      "   jo    0b"
-		      : "+a" (end), "+a" (tmp) : "d" (r0)  : "cc" );
+	asm volatile(
+		"0:	srst	%0,%1\n"
+		"	jo	0b"
+		: "+a" (end), "+a" (tmp) : "d" (r0)  : "cc");
 	return end - s;
 }
 
diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h
index 1604004..ccbafe4 100644
--- a/include/asm-s390/system.h
+++ b/include/asm-s390/system.h
@@ -23,74 +23,68 @@
 
 extern struct task_struct *__switch_to(void *, void *);
 
-#ifdef __s390x__
-#define __FLAG_SHIFT 56
-#else /* ! __s390x__ */
-#define __FLAG_SHIFT 24
-#endif /* ! __s390x__ */
-
 static inline void save_fp_regs(s390_fp_regs *fpregs)
 {
-	asm volatile (
-		"   std   0,8(%1)\n"
-		"   std   2,24(%1)\n"
-		"   std   4,40(%1)\n"
-		"   std   6,56(%1)"
-		: "=m" (*fpregs) : "a" (fpregs), "m" (*fpregs) : "memory" );
+	asm volatile(
+		"	std	0,8(%1)\n"
+		"	std	2,24(%1)\n"
+		"	std	4,40(%1)\n"
+		"	std	6,56(%1)"
+		: "=m" (*fpregs) : "a" (fpregs), "m" (*fpregs) : "memory");
 	if (!MACHINE_HAS_IEEE)
 		return;
 	asm volatile(
-		"   stfpc 0(%1)\n"
-		"   std   1,16(%1)\n"
-		"   std   3,32(%1)\n"
-		"   std   5,48(%1)\n"
-		"   std   7,64(%1)\n"
-		"   std   8,72(%1)\n"
-		"   std   9,80(%1)\n"
-		"   std   10,88(%1)\n"
-		"   std   11,96(%1)\n"
-		"   std   12,104(%1)\n"
-		"   std   13,112(%1)\n"
-		"   std   14,120(%1)\n"
-		"   std   15,128(%1)\n"
-		: "=m" (*fpregs) : "a" (fpregs), "m" (*fpregs) : "memory" );
+		"	stfpc	0(%1)\n"
+		"	std	1,16(%1)\n"
+		"	std	3,32(%1)\n"
+		"	std	5,48(%1)\n"
+		"	std	7,64(%1)\n"
+		"	std	8,72(%1)\n"
+		"	std	9,80(%1)\n"
+		"	std	10,88(%1)\n"
+		"	std	11,96(%1)\n"
+		"	std	12,104(%1)\n"
+		"	std	13,112(%1)\n"
+		"	std	14,120(%1)\n"
+		"	std	15,128(%1)\n"
+		: "=m" (*fpregs) : "a" (fpregs), "m" (*fpregs) : "memory");
 }
 
 static inline void restore_fp_regs(s390_fp_regs *fpregs)
 {
-	asm volatile (
-		"   ld    0,8(%0)\n"
-		"   ld    2,24(%0)\n"
-		"   ld    4,40(%0)\n"
-		"   ld    6,56(%0)"
-		: : "a" (fpregs), "m" (*fpregs) );
+	asm volatile(
+		"	ld	0,8(%0)\n"
+		"	ld	2,24(%0)\n"
+		"	ld	4,40(%0)\n"
+		"	ld	6,56(%0)"
+		: : "a" (fpregs), "m" (*fpregs));
 	if (!MACHINE_HAS_IEEE)
 		return;
 	asm volatile(
-		"   lfpc  0(%0)\n"
-		"   ld    1,16(%0)\n"
-		"   ld    3,32(%0)\n"
-		"   ld    5,48(%0)\n"
-		"   ld    7,64(%0)\n"
-		"   ld    8,72(%0)\n"
-		"   ld    9,80(%0)\n"
-		"   ld    10,88(%0)\n"
-		"   ld    11,96(%0)\n"
-		"   ld    12,104(%0)\n"
-		"   ld    13,112(%0)\n"
-		"   ld    14,120(%0)\n"
-		"   ld    15,128(%0)\n"
-		: : "a" (fpregs), "m" (*fpregs) );
+		"	lfpc	0(%0)\n"
+		"	ld	1,16(%0)\n"
+		"	ld	3,32(%0)\n"
+		"	ld	5,48(%0)\n"
+		"	ld	7,64(%0)\n"
+		"	ld	8,72(%0)\n"
+		"	ld	9,80(%0)\n"
+		"	ld	10,88(%0)\n"
+		"	ld	11,96(%0)\n"
+		"	ld	12,104(%0)\n"
+		"	ld	13,112(%0)\n"
+		"	ld	14,120(%0)\n"
+		"	ld	15,128(%0)\n"
+		: : "a" (fpregs), "m" (*fpregs));
 }
 
 static inline void save_access_regs(unsigned int *acrs)
 {
-	asm volatile ("stam 0,15,0(%0)" : : "a" (acrs) : "memory" );
+	asm volatile("stam 0,15,0(%0)" : : "a" (acrs) : "memory");
 }
 
 static inline void restore_access_regs(unsigned int *acrs)
 {
-	asm volatile ("lam 0,15,0(%0)" : : "a" (acrs) );
+	asm volatile("lam 0,15,0(%0)" : : "a" (acrs));
 }
 
 #define switch_to(prev,next,last) do {					     \
@@ -126,7 +120,7 @@
 	account_vtime(prev);						     \
 } while (0)
 
-#define nop() __asm__ __volatile__ ("nop")
+#define nop() asm volatile("nop")
 
 #define xchg(ptr,x)							  \
 ({									  \
@@ -147,15 +141,15 @@
 		shift = (3 ^ (addr & 3)) << 3;
 		addr ^= addr & 3;
 		asm volatile(
-			"    l   %0,0(%4)\n"
-			"0:  lr  0,%0\n"
-			"    nr  0,%3\n"
-			"    or  0,%2\n"
-			"    cs  %0,0,0(%4)\n"
-			"    jl  0b\n"
+			"	l	%0,0(%4)\n"
+			"0:	lr	0,%0\n"
+			"	nr	0,%3\n"
+			"	or	0,%2\n"
+			"	cs	%0,0,0(%4)\n"
+			"	jl	0b\n"
 			: "=&d" (old), "=m" (*(int *) addr)
 			: "d" (x << shift), "d" (~(255 << shift)), "a" (addr),
-			  "m" (*(int *) addr) : "memory", "cc", "0" );
+			  "m" (*(int *) addr) : "memory", "cc", "0");
 		x = old >> shift;
 		break;
 	case 2:
@@ -163,36 +157,36 @@
 		shift = (2 ^ (addr & 2)) << 3;
 		addr ^= addr & 2;
 		asm volatile(
-			"    l   %0,0(%4)\n"
-			"0:  lr  0,%0\n"
-			"    nr  0,%3\n"
-			"    or  0,%2\n"
-			"    cs  %0,0,0(%4)\n"
-			"    jl  0b\n"
+			"	l	%0,0(%4)\n"
+			"0:	lr	0,%0\n"
+			"	nr	0,%3\n"
+			"	or	0,%2\n"
+			"	cs	%0,0,0(%4)\n"
+			"	jl	0b\n"
 			: "=&d" (old), "=m" (*(int *) addr)
 			: "d" (x << shift), "d" (~(65535 << shift)), "a" (addr),
-			  "m" (*(int *) addr) : "memory", "cc", "0" );
+			  "m" (*(int *) addr) : "memory", "cc", "0");
 		x = old >> shift;
 		break;
 	case 4:
-		asm volatile (
-			"    l   %0,0(%3)\n"
-			"0:  cs  %0,%2,0(%3)\n"
-			"    jl  0b\n"
+		asm volatile(
+			"	l	%0,0(%3)\n"
+			"0:	cs	%0,%2,0(%3)\n"
+			"	jl	0b\n"
 			: "=&d" (old), "=m" (*(int *) ptr)
 			: "d" (x), "a" (ptr), "m" (*(int *) ptr)
-			: "memory", "cc" );
+			: "memory", "cc");
 		x = old;
 		break;
 #ifdef __s390x__
 	case 8:
-		asm volatile (
-			"    lg  %0,0(%3)\n"
-			"0:  csg %0,%2,0(%3)\n"
-			"    jl  0b\n"
+		asm volatile(
+			"	lg	%0,0(%3)\n"
+			"0:	csg	%0,%2,0(%3)\n"
+			"	jl	0b\n"
 			: "=&d" (old), "=m" (*(long *) ptr)
 			: "d" (x), "a" (ptr), "m" (*(long *) ptr)
-			: "memory", "cc" );
+			: "memory", "cc");
 		x = old;
 		break;
 #endif /* __s390x__ */
@@ -224,55 +218,55 @@
 		shift = (3 ^ (addr & 3)) << 3;
 		addr ^= addr & 3;
 		asm volatile(
-			"    l   %0,0(%4)\n"
-			"0:  nr  %0,%5\n"
-                        "    lr  %1,%0\n"
-			"    or  %0,%2\n"
-			"    or  %1,%3\n"
-			"    cs  %0,%1,0(%4)\n"
-			"    jnl 1f\n"
-			"    xr  %1,%0\n"
-			"    nr  %1,%5\n"
-			"    jnz 0b\n"
+			"	l	%0,0(%4)\n"
+			"0:	nr	%0,%5\n"
+			"	lr	%1,%0\n"
+			"	or	%0,%2\n"
+			"	or	%1,%3\n"
+			"	cs	%0,%1,0(%4)\n"
+			"	jnl	1f\n"
+			"	xr	%1,%0\n"
+			"	nr	%1,%5\n"
+			"	jnz	0b\n"
 			"1:"
 			: "=&d" (prev), "=&d" (tmp)
 			: "d" (old << shift), "d" (new << shift), "a" (ptr),
 			  "d" (~(255 << shift))
-			: "memory", "cc" );
+			: "memory", "cc");
 		return prev >> shift;
 	case 2:
 		addr = (unsigned long) ptr;
 		shift = (2 ^ (addr & 2)) << 3;
 		addr ^= addr & 2;
 		asm volatile(
-			"    l   %0,0(%4)\n"
-			"0:  nr  %0,%5\n"
-                        "    lr  %1,%0\n"
-			"    or  %0,%2\n"
-			"    or  %1,%3\n"
-			"    cs  %0,%1,0(%4)\n"
-			"    jnl 1f\n"
-			"    xr  %1,%0\n"
-			"    nr  %1,%5\n"
-			"    jnz 0b\n"
+			"	l	%0,0(%4)\n"
+			"0:	nr	%0,%5\n"
+			"	lr	%1,%0\n"
+			"	or	%0,%2\n"
+			"	or	%1,%3\n"
+			"	cs	%0,%1,0(%4)\n"
+			"	jnl	1f\n"
+			"	xr	%1,%0\n"
+			"	nr	%1,%5\n"
+			"	jnz	0b\n"
 			"1:"
 			: "=&d" (prev), "=&d" (tmp)
 			: "d" (old << shift), "d" (new << shift), "a" (ptr),
 			  "d" (~(65535 << shift))
-			: "memory", "cc" );
+			: "memory", "cc");
 		return prev >> shift;
 	case 4:
-		asm volatile (
-			"    cs  %0,%2,0(%3)\n"
+		asm volatile(
+			"	cs	%0,%2,0(%3)\n"
 			: "=&d" (prev) : "0" (old), "d" (new), "a" (ptr)
-			: "memory", "cc" );
+			: "memory", "cc");
 		return prev;
 #ifdef __s390x__
 	case 8:
-		asm volatile (
-			"    csg %0,%2,0(%3)\n"
+		asm volatile(
+			"	csg	%0,%2,0(%3)\n"
 			: "=&d" (prev) : "0" (old), "d" (new), "a" (ptr)
-			: "memory", "cc" );
+			: "memory", "cc");
 		return prev;
 #endif /* __s390x__ */
         }
@@ -289,8 +283,8 @@
  * all memory ops have completed wrt other CPU's ( see 7-15 POP  DJB ).
  */
 
-#define eieio()  __asm__ __volatile__ ( "bcr 15,0" : : : "memory" ) 
-# define SYNC_OTHER_CORES(x)   eieio() 
+#define eieio()	asm volatile("bcr 15,0" : : : "memory")
+#define SYNC_OTHER_CORES(x)   eieio()
 #define mb()    eieio()
 #define rmb()   eieio()
 #define wmb()   eieio()
@@ -307,117 +301,56 @@
 
 #ifdef __s390x__
 
-#define __ctl_load(array, low, high) ({ \
-	typedef struct { char _[sizeof(array)]; } addrtype; \
-	__asm__ __volatile__ ( \
-		"   bras  1,0f\n" \
-                "   lctlg 0,0,0(%0)\n" \
-		"0: ex    %1,0(1)" \
-		: : "a" (&array), "a" (((low)<<4)+(high)), \
-		    "m" (*(addrtype *)(array)) : "1" ); \
+#define __ctl_load(array, low, high) ({				\
+	typedef struct { char _[sizeof(array)]; } addrtype;	\
+	asm volatile(						\
+		"	lctlg	%1,%2,0(%0)\n"			\
+		: : "a" (&array), "i" (low), "i" (high),	\
+		    "m" (*(addrtype *)(array)));		\
 	})
 
-#define __ctl_store(array, low, high) ({ \
-	typedef struct { char _[sizeof(array)]; } addrtype; \
-	__asm__ __volatile__ ( \
-		"   bras  1,0f\n" \
-		"   stctg 0,0,0(%1)\n" \
-		"0: ex    %2,0(1)" \
-		: "=m" (*(addrtype *)(array)) \
-		: "a" (&array), "a" (((low)<<4)+(high)) : "1" ); \
+#define __ctl_store(array, low, high) ({			\
+	typedef struct { char _[sizeof(array)]; } addrtype;	\
+	asm volatile(						\
+		"	stctg	%2,%3,0(%1)\n"			\
+		: "=m" (*(addrtype *)(array))			\
+		: "a" (&array), "i" (low), "i" (high));		\
 	})
 
-#define __ctl_set_bit(cr, bit) ({ \
-        __u8 __dummy[24]; \
-        __asm__ __volatile__ ( \
-                "    bras  1,0f\n"       /* skip indirect insns */ \
-                "    stctg 0,0,0(%1)\n" \
-                "    lctlg 0,0,0(%1)\n" \
-                "0:  ex    %2,0(1)\n"    /* execute stctl */ \
-                "    lg    0,0(%1)\n" \
-                "    ogr   0,%3\n"       /* set the bit */ \
-                "    stg   0,0(%1)\n" \
-                "1:  ex    %2,6(1)"      /* execute lctl */ \
-                : "=m" (__dummy) \
-		: "a" ((((unsigned long) &__dummy) + 7) & ~7UL), \
-		  "a" (cr*17), "a" (1L<<(bit)) \
-                : "cc", "0", "1" ); \
-        })
-
-#define __ctl_clear_bit(cr, bit) ({ \
-        __u8 __dummy[16]; \
-        __asm__ __volatile__ ( \
-                "    bras  1,0f\n"       /* skip indirect insns */ \
-                "    stctg 0,0,0(%1)\n" \
-                "    lctlg 0,0,0(%1)\n" \
-                "0:  ex    %2,0(1)\n"    /* execute stctl */ \
-                "    lg    0,0(%1)\n" \
-                "    ngr   0,%3\n"       /* set the bit */ \
-                "    stg   0,0(%1)\n" \
-                "1:  ex    %2,6(1)"      /* execute lctl */ \
-                : "=m" (__dummy) \
-		: "a" ((((unsigned long) &__dummy) + 7) & ~7UL), \
-		  "a" (cr*17), "a" (~(1L<<(bit))) \
-                : "cc", "0", "1" ); \
-        })
-
 #else /* __s390x__ */
 
-#define __ctl_load(array, low, high) ({ \
-	typedef struct { char _[sizeof(array)]; } addrtype; \
-	__asm__ __volatile__ ( \
-		"   bras  1,0f\n" \
-                "   lctl 0,0,0(%0)\n" \
-		"0: ex    %1,0(1)" \
-		: : "a" (&array), "a" (((low)<<4)+(high)), \
-		    "m" (*(addrtype *)(array)) : "1" ); \
+#define __ctl_load(array, low, high) ({				\
+	typedef struct { char _[sizeof(array)]; } addrtype;	\
+	asm volatile(						\
+		"	lctl	%1,%2,0(%0)\n"			\
+		: : "a" (&array), "i" (low), "i" (high),	\
+		    "m" (*(addrtype *)(array)));		\
+})
+
+#define __ctl_store(array, low, high) ({			\
+	typedef struct { char _[sizeof(array)]; } addrtype;	\
+	asm volatile(						\
+		"	stctl	%2,%3,0(%1)\n"			\
+		: "=m" (*(addrtype *)(array))			\
+		: "a" (&array), "i" (low), "i" (high));		\
 	})
 
-#define __ctl_store(array, low, high) ({ \
-	typedef struct { char _[sizeof(array)]; } addrtype; \
-	__asm__ __volatile__ ( \
-		"   bras  1,0f\n" \
-		"   stctl 0,0,0(%1)\n" \
-		"0: ex    %2,0(1)" \
-		: "=m" (*(addrtype *)(array)) \
-		: "a" (&array), "a" (((low)<<4)+(high)): "1" ); \
-	})
-
-#define __ctl_set_bit(cr, bit) ({ \
-        __u8 __dummy[16]; \
-        __asm__ __volatile__ ( \
-                "    bras  1,0f\n"       /* skip indirect insns */ \
-                "    stctl 0,0,0(%1)\n" \
-                "    lctl  0,0,0(%1)\n" \
-                "0:  ex    %2,0(1)\n"    /* execute stctl */ \
-                "    l     0,0(%1)\n" \
-                "    or    0,%3\n"       /* set the bit */ \
-                "    st    0,0(%1)\n" \
-                "1:  ex    %2,4(1)"      /* execute lctl */ \
-                : "=m" (__dummy) \
-		: "a" ((((unsigned long) &__dummy) + 7) & ~7UL), \
-		  "a" (cr*17), "a" (1<<(bit)) \
-                : "cc", "0", "1" ); \
-        })
-
-#define __ctl_clear_bit(cr, bit) ({ \
-        __u8 __dummy[16]; \
-        __asm__ __volatile__ ( \
-                "    bras  1,0f\n"       /* skip indirect insns */ \
-                "    stctl 0,0,0(%1)\n" \
-                "    lctl  0,0,0(%1)\n" \
-                "0:  ex    %2,0(1)\n"    /* execute stctl */ \
-                "    l     0,0(%1)\n" \
-                "    nr    0,%3\n"       /* set the bit */ \
-                "    st    0,0(%1)\n" \
-                "1:  ex    %2,4(1)"      /* execute lctl */ \
-                : "=m" (__dummy) \
-		: "a" ((((unsigned long) &__dummy) + 7) & ~7UL), \
-		  "a" (cr*17), "a" (~(1<<(bit))) \
-                : "cc", "0", "1" ); \
-        })
 #endif /* __s390x__ */
 
+#define __ctl_set_bit(cr, bit) ({	\
+	unsigned long __dummy;		\
+	__ctl_store(__dummy, cr, cr);	\
+	__dummy |= 1UL << (bit);	\
+	__ctl_load(__dummy, cr, cr);	\
+})
+
+#define __ctl_clear_bit(cr, bit) ({	\
+	unsigned long __dummy;		\
+	__ctl_store(__dummy, cr, cr);	\
+	__dummy &= ~(1UL << (bit));	\
+	__ctl_load(__dummy, cr, cr);	\
+})
+
 #include <linux/irqflags.h>
 
 /*
@@ -427,8 +360,7 @@
 static inline void
 __set_psw_mask(unsigned long mask)
 {
-	local_save_flags(mask);
-	__load_psw_mask(mask);
+	__load_psw_mask(mask | (__raw_local_irq_stosm(0x00) & ~(-1UL >> 8)));
 }
 
 #define local_mcck_enable()  __set_psw_mask(PSW_KERNEL_BITS)
diff --git a/include/asm-s390/timex.h b/include/asm-s390/timex.h
index 5d0332a..4df4a41 100644
--- a/include/asm-s390/timex.h
+++ b/include/asm-s390/timex.h
@@ -15,20 +15,21 @@
 
 typedef unsigned long long cycles_t;
 
-static inline cycles_t get_cycles(void)
-{
-	cycles_t cycles;
-
-	__asm__ __volatile__ ("stck 0(%1)" : "=m" (cycles) : "a" (&cycles) : "cc");
-	return cycles >> 2;
-}
-
 static inline unsigned long long get_clock (void)
 {
 	unsigned long long clk;
 
-	__asm__ __volatile__ ("stck 0(%1)" : "=m" (clk) : "a" (&clk) : "cc");
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
+	asm volatile("stck %0" : "=Q" (clk) : : "cc");
+#else /* __GNUC__ */
+	asm volatile("stck 0(%1)" : "=m" (clk) : "a" (&clk) : "cc");
+#endif /* __GNUC__ */
 	return clk;
 }
 
+static inline cycles_t get_cycles(void)
+{
+	return (cycles_t) get_clock() >> 2;
+}
+
 #endif
diff --git a/include/asm-s390/tlbflush.h b/include/asm-s390/tlbflush.h
index 73cd85b..fa4dc91 100644
--- a/include/asm-s390/tlbflush.h
+++ b/include/asm-s390/tlbflush.h
@@ -25,7 +25,7 @@
  */
 
 #define local_flush_tlb() \
-do {  __asm__ __volatile__("ptlb": : :"memory"); } while (0)
+do {  asm volatile("ptlb": : :"memory"); } while (0)
 
 #ifndef CONFIG_SMP
 
@@ -68,24 +68,24 @@
 
 static inline void global_flush_tlb(void)
 {
+	register unsigned long reg2 asm("2");
+	register unsigned long reg3 asm("3");
+	register unsigned long reg4 asm("4");
+	long dummy;
+
 #ifndef __s390x__
 	if (!MACHINE_HAS_CSP) {
 		smp_ptlb_all();
 		return;
 	}
 #endif /* __s390x__ */
-	{
-		register unsigned long addr asm("4");
-		long dummy;
 
-		dummy = 0;
-		addr = ((unsigned long) &dummy) + 1;
-		__asm__ __volatile__ (
-			"    slr  2,2\n"
-			"    slr  3,3\n"
-			"    csp  2,%0"
-			: : "a" (addr), "m" (dummy) : "cc", "2", "3" );
-	}
+	dummy = 0;
+	reg2 = reg3 = 0;
+	reg4 = ((unsigned long) &dummy) + 1;
+	asm volatile(
+		"	csp	%0,%2"
+		: : "d" (reg2), "d" (reg3), "d" (reg4), "m" (dummy) : "cc" );
 }
 
 /*
@@ -102,9 +102,9 @@
 	if (unlikely(cpus_empty(mm->cpu_vm_mask)))
 		return;
 	if (MACHINE_HAS_IDTE) {
-		asm volatile (".insn rrf,0xb98e0000,0,%0,%1,0"
-			      : : "a" (2048),
-			      "a" (__pa(mm->pgd)&PAGE_MASK) : "cc" );
+		asm volatile(
+			"	.insn	rrf,0xb98e0000,0,%0,%1,0"
+			: : "a" (2048), "a" (__pa(mm->pgd)&PAGE_MASK) : "cc");
 		return;
 	}
 	preempt_disable();
diff --git a/include/asm-s390/uaccess.h b/include/asm-s390/uaccess.h
index e2047b0..72ae4ef 100644
--- a/include/asm-s390/uaccess.h
+++ b/include/asm-s390/uaccess.h
@@ -38,25 +38,14 @@
 #define get_ds()        (KERNEL_DS)
 #define get_fs()        (current->thread.mm_segment)
 
-#ifdef __s390x__
 #define set_fs(x) \
 ({									\
 	unsigned long __pto;						\
 	current->thread.mm_segment = (x);				\
 	__pto = current->thread.mm_segment.ar4 ?			\
 		S390_lowcore.user_asce : S390_lowcore.kernel_asce;	\
-	asm volatile ("lctlg 7,7,%0" : : "m" (__pto) );			\
+	__ctl_load(__pto, 7, 7);					\
 })
-#else /* __s390x__ */
-#define set_fs(x) \
-({									\
-	unsigned long __pto;						\
-	current->thread.mm_segment = (x);				\
-	__pto = current->thread.mm_segment.ar4 ?			\
-		S390_lowcore.user_asce : S390_lowcore.kernel_asce;	\
-	asm volatile ("lctl  7,7,%0" : : "m" (__pto) );			\
-})
-#endif /* __s390x__ */
 
 #define segment_eq(a,b) ((a).ar4 == (b).ar4)
 
diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h
index d49c54c..0361ac5 100644
--- a/include/asm-s390/unistd.h
+++ b/include/asm-s390/unistd.h
@@ -355,145 +355,145 @@
 
 #define _svc_clobber "1", "cc", "memory"
 
-#define _syscall0(type,name)				     \
-type name(void) {					     \
-	register long __svcres asm("2");		     \
-	long __res;					     \
-	__asm__ __volatile__ (				     \
-		"    .if %1 < 256\n"			     \
-		"    svc %b1\n"				     \
-		"    .else\n"				     \
-		"    la  %%r1,%1\n"			     \
-		"    svc 0\n"				     \
-		"    .endif"				     \
-		: "=d" (__svcres)			     \
-		: "i" (__NR_##name)			     \
-		: _svc_clobber );			     \
-	__res = __svcres;				     \
-	__syscall_return(type,__res);			     \
+#define _syscall0(type,name)					\
+type name(void) {						\
+	register long __svcres asm("2");			\
+	long __res;						\
+	asm volatile(						\
+		"	.if	%1 < 256\n"			\
+		"	svc	%b1\n"				\
+		"	.else\n"				\
+		"	la	%%r1,%1\n"			\
+		"	svc	0\n"				\
+		"	.endif"					\
+		: "=d" (__svcres)				\
+		: "i" (__NR_##name)				\
+		: _svc_clobber);				\
+	__res = __svcres;					\
+	__syscall_return(type,__res);				\
 }
 
-#define _syscall1(type,name,type1,arg1)			     \
-type name(type1 arg1) {					     \
-	register type1 __arg1 asm("2") = arg1;		     \
-	register long __svcres asm("2");		     \
-	long __res;					     \
-	__asm__ __volatile__ (				     \
-		"    .if %1 < 256\n"			     \
-		"    svc %b1\n"				     \
-		"    .else\n"				     \
-		"    la  %%r1,%1\n"			     \
-		"    svc 0\n"				     \
-		"    .endif"				     \
-		: "=d" (__svcres)			     \
-		: "i" (__NR_##name),			     \
-		  "0" (__arg1)				     \
-		: _svc_clobber );			     \
-	__res = __svcres;				     \
-	__syscall_return(type,__res);			     \
+#define _syscall1(type,name,type1,arg1)				\
+type name(type1 arg1) {						\
+	register type1 __arg1 asm("2") = arg1;			\
+	register long __svcres asm("2");			\
+	long __res;						\
+	asm volatile(						\
+		"	.if	%1 < 256\n"			\
+		"	svc	%b1\n"				\
+		"	.else\n"				\
+		"	la	%%r1,%1\n"			\
+		"	svc	0\n"				\
+		"	.endif"					\
+		: "=d" (__svcres)				\
+		: "i" (__NR_##name),				\
+		  "0" (__arg1)					\
+		: _svc_clobber);				\
+	__res = __svcres;					\
+	__syscall_return(type,__res);				\
 }
 
-#define _syscall2(type,name,type1,arg1,type2,arg2)	     \
-type name(type1 arg1, type2 arg2) {			     \
-	register type1 __arg1 asm("2") = arg1;		     \
-	register type2 __arg2 asm("3") = arg2;		     \
-	register long __svcres asm("2");		     \
-	long __res;					     \
-	__asm__ __volatile__ (				     \
-		"    .if %1 < 256\n"			     \
-		"    svc %b1\n"				     \
-		"    .else\n"				     \
-		"    la %%r1,%1\n"			     \
-		"    svc 0\n"				     \
-		"    .endif"				     \
-		: "=d" (__svcres)			     \
-		: "i" (__NR_##name),			     \
-		  "0" (__arg1),				     \
-		  "d" (__arg2)				     \
-		: _svc_clobber );			     \
-	__res = __svcres;				     \
-	__syscall_return(type,__res);			     \
+#define _syscall2(type,name,type1,arg1,type2,arg2)		\
+type name(type1 arg1, type2 arg2) {				\
+	register type1 __arg1 asm("2") = arg1;			\
+	register type2 __arg2 asm("3") = arg2;			\
+	register long __svcres asm("2");			\
+	long __res;						\
+	asm volatile(						\
+		"	.if	%1 < 256\n"			\
+		"	svc	%b1\n"				\
+		"	.else\n"				\
+		"	la	%%r1,%1\n"			\
+		"	svc	0\n"				\
+		"	.endif"					\
+		: "=d" (__svcres)				\
+		: "i" (__NR_##name),				\
+		  "0" (__arg1),					\
+		  "d" (__arg2)					\
+		: _svc_clobber );				\
+	__res = __svcres;					\
+	__syscall_return(type,__res);				\
 }
 
-#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)\
-type name(type1 arg1, type2 arg2, type3 arg3) {		     \
-	register type1 __arg1 asm("2") = arg1;		     \
-	register type2 __arg2 asm("3") = arg2;		     \
-	register type3 __arg3 asm("4") = arg3;		     \
-	register long __svcres asm("2");		     \
-	long __res;					     \
-	__asm__ __volatile__ (				     \
-		"    .if %1 < 256\n"			     \
-		"    svc %b1\n"				     \
-		"    .else\n"				     \
-		"    la  %%r1,%1\n"			     \
-		"    svc 0\n"				     \
-		"    .endif"				     \
-		: "=d" (__svcres)			     \
-		: "i" (__NR_##name),			     \
-		  "0" (__arg1),				     \
-		  "d" (__arg2),				     \
-		  "d" (__arg3)				     \
-		: _svc_clobber );			     \
-	__res = __svcres;				     \
-	__syscall_return(type,__res);			     \
+#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3)	\
+type name(type1 arg1, type2 arg2, type3 arg3) {			\
+	register type1 __arg1 asm("2") = arg1;			\
+	register type2 __arg2 asm("3") = arg2;			\
+	register type3 __arg3 asm("4") = arg3;			\
+	register long __svcres asm("2");			\
+	long __res;						\
+	asm volatile(						\
+		"	.if	%1 < 256\n"			\
+		"	svc	%b1\n"				\
+		"	.else\n"				\
+		"	la	%%r1,%1\n"			\
+		"	svc	0\n"				\
+		"	.endif"					\
+		: "=d" (__svcres)				\
+		: "i" (__NR_##name),				\
+		  "0" (__arg1),					\
+		  "d" (__arg2),					\
+		  "d" (__arg3)					\
+		: _svc_clobber);				\
+	__res = __svcres;					\
+	__syscall_return(type,__res);				\
 }
 
-#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,\
-		  type4,name4)				     \
-type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {  \
-	register type1 __arg1 asm("2") = arg1;		     \
-	register type2 __arg2 asm("3") = arg2;		     \
-	register type3 __arg3 asm("4") = arg3;		     \
-	register type4 __arg4 asm("5") = arg4;		     \
-	register long __svcres asm("2");		     \
-	long __res;					     \
-	__asm__ __volatile__ (				     \
-		"    .if %1 < 256\n"			     \
-		"    svc %b1\n"				     \
-		"    .else\n"				     \
-		"    la  %%r1,%1\n"			     \
-		"    svc 0\n"				     \
-		"    .endif"				     \
-		: "=d" (__svcres)			     \
-		: "i" (__NR_##name),			     \
-		  "0" (__arg1),				     \
-		  "d" (__arg2),				     \
-		  "d" (__arg3),				     \
-		  "d" (__arg4)				     \
-		: _svc_clobber );			     \
-	__res = __svcres;				     \
-	__syscall_return(type,__res);			     \
+#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,	\
+		  type4,name4)					\
+type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {	\
+	register type1 __arg1 asm("2") = arg1;			\
+	register type2 __arg2 asm("3") = arg2;			\
+	register type3 __arg3 asm("4") = arg3;			\
+	register type4 __arg4 asm("5") = arg4;			\
+	register long __svcres asm("2");			\
+	long __res;						\
+	asm volatile(						\
+		"	.if	%1 < 256\n"			\
+		"	svc	%b1\n"				\
+		"	.else\n"				\
+		"	la	%%r1,%1\n"			\
+		"	svc	0\n"				\
+		"	.endif"					\
+		: "=d" (__svcres)				\
+		: "i" (__NR_##name),				\
+		  "0" (__arg1),					\
+		  "d" (__arg2),					\
+		  "d" (__arg3),					\
+		  "d" (__arg4)					\
+		: _svc_clobber);				\
+	__res = __svcres;					\
+	__syscall_return(type,__res);				\
 }
 
-#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,\
-		  type4,name4,type5,name5)		     \
-type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4,    \
-	  type5 arg5) {					     \
-	register type1 __arg1 asm("2") = arg1;		     \
-	register type2 __arg2 asm("3") = arg2;		     \
-	register type3 __arg3 asm("4") = arg3;		     \
-	register type4 __arg4 asm("5") = arg4;		     \
-	register type5 __arg5 asm("6") = arg5;		     \
-	register long __svcres asm("2");		     \
-	long __res;					     \
-	__asm__ __volatile__ (				     \
-		"    .if %1 < 256\n"			     \
-		"    svc %b1\n"				     \
-		"    .else\n"				     \
-		"    la  %%r1,%1\n"			     \
-		"    svc 0\n"				     \
-		"    .endif"				     \
-		: "=d" (__svcres)			     \
-		: "i" (__NR_##name),			     \
-		  "0" (__arg1),				     \
-		  "d" (__arg2),				     \
-		  "d" (__arg3),				     \
-		  "d" (__arg4),				     \
-		  "d" (__arg5)				     \
-		: _svc_clobber );			     \
-	__res = __svcres;				     \
-	__syscall_return(type,__res);			     \
+#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,	\
+		  type4,name4,type5,name5)			\
+type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4,	\
+	  type5 arg5) {						\
+	register type1 __arg1 asm("2") = arg1;			\
+	register type2 __arg2 asm("3") = arg2;			\
+	register type3 __arg3 asm("4") = arg3;			\
+	register type4 __arg4 asm("5") = arg4;			\
+	register type5 __arg5 asm("6") = arg5;			\
+	register long __svcres asm("2");			\
+	long __res;						\
+	asm volatile(						\
+		"	.if	%1 < 256\n"			\
+		"	svc	%b1\n"				\
+		"	.else\n"				\
+		"	la	%%r1,%1\n"			\
+		"	svc	0\n"				\
+		"	.endif"					\
+		: "=d" (__svcres)				\
+		: "i" (__NR_##name),				\
+		  "0" (__arg1),					\
+		  "d" (__arg2),					\
+		  "d" (__arg3),					\
+		  "d" (__arg4),					\
+		  "d" (__arg5)					\
+		: _svc_clobber);				\
+	__res = __svcres;					\
+	__syscall_return(type,__res);				\
 }
 
 #define __ARCH_WANT_IPC_PARSE_VERSION