Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/cjb/mmc

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/cjb/mmc: (78 commits)
  mmc: MAINTAINERS: add myself as a tmio-mmc maintainer
  mmc: print debug messages for runtime PM actions
  mmc: fix runtime PM with -ENOSYS suspend case
  mmc: at91_mci: move register header from include/ to drivers/
  mmc: mxs-mmc: fix clock rate setting
  mmc: tmio: fix a deadlock
  mmc: tmio: fix a recently introduced bug in DMA code
  mmc: sh_mmcif: maximize power saving
  mmc: tmio: maximize power saving
  mmc: tmio: fix recursive spinlock, don't schedule with interrupts disabled
  mmc: Added quirks for Ricoh 1180:e823 lower base clock frequency
  mmc: omap_hsmmc: fix oops in omap_hsmmc_dma_cb()
  mmc: omap_hsmmc: refactor duplicated code
  mmc: omap_hsmmc: fix a few bugs when setting the clock divisor
  mmc: omap_hsmmc: introduce start_clock and re-use stop_clock
  mmc: omap_hsmmc: split duplicate code to calc_divisor() function
  mmc: omap_hsmmc: move hardcoded frequency constants to defines
  mmc: omap_hsmmc: correct debug report error status mnemonics
  mmc: block: fixed NULL pointer dereference
  mmc: documentation of mmc non-blocking request usage and design.
  ...
diff --git a/Documentation/mmc/00-INDEX b/Documentation/mmc/00-INDEX
index 93dd7a7..a9ba672 100644
--- a/Documentation/mmc/00-INDEX
+++ b/Documentation/mmc/00-INDEX
@@ -4,3 +4,5 @@
         - info on SD and MMC device attributes
 mmc-dev-parts.txt
         - info on SD and MMC device partitions
+mmc-async-req.txt
+        - info on mmc asynchronous requests
diff --git a/Documentation/mmc/mmc-async-req.txt b/Documentation/mmc/mmc-async-req.txt
new file mode 100644
index 0000000..ae1907b
--- /dev/null
+++ b/Documentation/mmc/mmc-async-req.txt
@@ -0,0 +1,87 @@
+Rationale
+=========
+
+How significant is the cache maintenance overhead?
+It depends. Fast eMMC and multiple cache levels with speculative cache
+pre-fetch makes the cache overhead relatively significant. If the DMA
+preparations for the next request are done in parallel with the current
+transfer, the DMA preparation overhead would not affect the MMC performance.
+The intention of non-blocking (asynchronous) MMC requests is to minimize the
+time between when an MMC request ends and another MMC request begins.
+Using mmc_wait_for_req(), the MMC controller is idle while dma_map_sg and
+dma_unmap_sg are processing. Using non-blocking MMC requests makes it
+possible to prepare the caches for next job in parallel with an active
+MMC request.
+
+MMC block driver
+================
+
+The mmc_blk_issue_rw_rq() in the MMC block driver is made non-blocking.
+The increase in throughput is proportional to the time it takes to
+prepare (major part of preparations are dma_map_sg() and dma_unmap_sg())
+a request and how fast the memory is. The faster the MMC/SD is the
+more significant the prepare request time becomes. Roughly the expected
+performance gain is 5% for large writes and 10% on large reads on a L2 cache
+platform. In power save mode, when clocks run on a lower frequency, the DMA
+preparation may cost even more. As long as these slower preparations are run
+in parallel with the transfer performance won't be affected.
+
+Details on measurements from IOZone and mmc_test
+================================================
+
+https://wiki.linaro.org/WorkingGroups/Kernel/Specs/StoragePerfMMC-async-req
+
+MMC core API extension
+======================
+
+There is one new public function mmc_start_req().
+It starts a new MMC command request for a host. The function isn't
+truly non-blocking. If there is an ongoing async request it waits
+for completion of that request and starts the new one and returns. It
+doesn't wait for the new request to complete. If there is no ongoing
+request it starts the new request and returns immediately.
+
+MMC host extensions
+===================
+
+There are two optional members in the mmc_host_ops -- pre_req() and
+post_req() -- that the host driver may implement in order to move work
+to before and after the actual mmc_host_ops.request() function is called.
+In the DMA case pre_req() may do dma_map_sg() and prepare the DMA
+descriptor, and post_req() runs the dma_unmap_sg().
+
+Optimize for the first request
+==============================
+
+The first request in a series of requests can't be prepared in parallel
+with the previous transfer, since there is no previous request.
+The argument is_first_req in pre_req() indicates that there is no previous
+request. The host driver may optimize for this scenario to minimize
+the performance loss. A way to optimize for this is to split the current
+request in two chunks, prepare the first chunk and start the request,
+and finally prepare the second chunk and start the transfer.
+
+Pseudocode to handle is_first_req scenario with minimal prepare overhead:
+
+if (is_first_req && req->size > threshold)
+   /* start MMC transfer for the complete transfer size */
+   mmc_start_command(MMC_CMD_TRANSFER_FULL_SIZE);
+
+   /*
+    * Begin to prepare DMA while cmd is being processed by MMC.
+    * The first chunk of the request should take the same time
+    * to prepare as the "MMC process command time".
+    * If prepare time exceeds MMC cmd time
+    * the transfer is delayed, guesstimate max 4k as first chunk size.
+    */
+    prepare_1st_chunk_for_dma(req);
+    /* flush pending desc to the DMAC (dmaengine.h) */
+    dma_issue_pending(req->dma_desc);
+
+    prepare_2nd_chunk_for_dma(req);
+    /*
+     * The second issue_pending should be called before MMC runs out
+     * of the first chunk. If the MMC runs out of the first data chunk
+     * before this call, the transfer is delayed.
+     */
+    dma_issue_pending(req->dma_desc);
diff --git a/MAINTAINERS b/MAINTAINERS
index 72b979d..789fed6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4585,9 +4585,8 @@
 F:	drivers/mmc/host/omap.c
 
 OMAP HS MMC SUPPORT
-M:	Madhusudhan Chikkature <madhu.cr@ti.com>
 L:	linux-omap@vger.kernel.org
-S:	Maintained
+S:	Orphan
 F:	drivers/mmc/host/omap_hsmmc.c
 
 OMAP RANDOM NUMBER GENERATOR SUPPORT
@@ -6243,9 +6242,14 @@
 F:	include/linux/toshiba.h
 
 TMIO MMC DRIVER
+M:	Guennadi Liakhovetski <g.liakhovetski@gmx.de>
 M:	Ian Molton <ian@mnementh.co.uk>
+L:	linux-mmc@vger.kernel.org
 S:	Maintained
-F:	drivers/mmc/host/tmio_mmc.*
+F:	drivers/mmc/host/tmio_mmc*
+F:	drivers/mmc/host/sh_mobile_sdhi.c
+F:	include/linux/mmc/tmio.h
+F:	include/linux/mmc/sh_mobile_sdhi.h
 
 TMPFS (SHMEM FILESYSTEM)
 M:	Hugh Dickins <hughd@google.com>
diff --git a/arch/arm/configs/mmp2_defconfig b/arch/arm/configs/mmp2_defconfig
index 47ad3b1..5a58452 100644
--- a/arch/arm/configs/mmp2_defconfig
+++ b/arch/arm/configs/mmp2_defconfig
@@ -8,6 +8,7 @@
 CONFIG_MODULE_FORCE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_ARCH_MMP=y
+CONFIG_MACH_BROWNSTONE=y
 CONFIG_MACH_FLINT=y
 CONFIG_MACH_MARVELL_JASPER=y
 CONFIG_HIGH_RES_TIMERS=y
@@ -63,10 +64,16 @@
 # CONFIG_USB_SUPPORT is not set
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_MAX8925=y
+CONFIG_MMC=y
 # CONFIG_DNOTIFY is not set
 CONFIG_INOTIFY=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_FAT_DEFAULT_CODEPAGE=437
 CONFIG_JFFS2_FS=y
 CONFIG_CRAMFS=y
 CONFIG_NFS_FS=y
@@ -81,7 +88,7 @@
 # CONFIG_DEBUG_PREEMPT is not set
 CONFIG_DEBUG_INFO=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_DYNAMIC_DEBUG=y
+# CONFIG_DYNAMIC_DEBUG is not set
 CONFIG_DEBUG_USER=y
 CONFIG_DEBUG_ERRORS=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/arm/mach-mmp/brownstone.c b/arch/arm/mach-mmp/brownstone.c
index 7bb78fd..c79162a 100644
--- a/arch/arm/mach-mmp/brownstone.c
+++ b/arch/arm/mach-mmp/brownstone.c
@@ -177,9 +177,16 @@
 };
 
 static struct sdhci_pxa_platdata mmp2_sdh_platdata_mmc0 = {
-	.max_speed	= 25000000,
+	.clk_delay_cycles = 0x1f,
 };
 
+static struct sdhci_pxa_platdata mmp2_sdh_platdata_mmc2 = {
+	.clk_delay_cycles = 0x1f,
+	.flags = PXA_FLAG_CARD_PERMANENT
+		| PXA_FLAG_SD_8_BIT_CAPABLE_SLOT,
+};
+
+
 static void __init brownstone_init(void)
 {
 	mfp_config(ARRAY_AND_SIZE(brownstone_pin_config));
@@ -189,6 +196,7 @@
 	mmp2_add_uart(3);
 	mmp2_add_twsi(1, NULL, ARRAY_AND_SIZE(brownstone_twsi1_info));
 	mmp2_add_sdhost(0, &mmp2_sdh_platdata_mmc0); /* SD/MMC */
+	mmp2_add_sdhost(2, &mmp2_sdh_platdata_mmc2); /* eMMC */
 
 	/* enable 5v regulator */
 	platform_device_register(&brownstone_v_5vp_device);
diff --git a/arch/arm/mach-mmp/include/mach/mmp2.h b/arch/arm/mach-mmp/include/mach/mmp2.h
index 2cbf6df..de7b888 100644
--- a/arch/arm/mach-mmp/include/mach/mmp2.h
+++ b/arch/arm/mach-mmp/include/mach/mmp2.h
@@ -1,7 +1,7 @@
 #ifndef __ASM_MACH_MMP2_H
 #define __ASM_MACH_MMP2_H
 
-#include <plat/sdhci.h>
+#include <linux/platform_data/pxa_sdhci.h>
 
 struct sys_timer;
 
diff --git a/arch/arm/mach-mmp/jasper.c b/arch/arm/mach-mmp/jasper.c
index 24172a0..5d6421d 100644
--- a/arch/arm/mach-mmp/jasper.c
+++ b/arch/arm/mach-mmp/jasper.c
@@ -154,7 +154,7 @@
 };
 
 static struct sdhci_pxa_platdata mmp2_sdh_platdata_mmc0 = {
-	.max_speed	= 25000000,
+	.clk_delay_cycles = 0x1f,
 };
 
 static void __init jasper_init(void)
diff --git a/arch/arm/mach-mmp/mmp2.c b/arch/arm/mach-mmp/mmp2.c
index 8e6c3ac..079c188 100644
--- a/arch/arm/mach-mmp/mmp2.c
+++ b/arch/arm/mach-mmp/mmp2.c
@@ -168,10 +168,10 @@
 	INIT_CLKREG(&clk_twsi5, "pxa2xx-i2c.4", NULL),
 	INIT_CLKREG(&clk_twsi6, "pxa2xx-i2c.5", NULL),
 	INIT_CLKREG(&clk_nand, "pxa3xx-nand", NULL),
-	INIT_CLKREG(&clk_sdh0, "sdhci-pxa.0", "PXA-SDHCLK"),
-	INIT_CLKREG(&clk_sdh1, "sdhci-pxa.1", "PXA-SDHCLK"),
-	INIT_CLKREG(&clk_sdh2, "sdhci-pxa.2", "PXA-SDHCLK"),
-	INIT_CLKREG(&clk_sdh3, "sdhci-pxa.3", "PXA-SDHCLK"),
+	INIT_CLKREG(&clk_sdh0, "sdhci-pxav3.0", "PXA-SDHCLK"),
+	INIT_CLKREG(&clk_sdh1, "sdhci-pxav3.1", "PXA-SDHCLK"),
+	INIT_CLKREG(&clk_sdh2, "sdhci-pxav3.2", "PXA-SDHCLK"),
+	INIT_CLKREG(&clk_sdh3, "sdhci-pxav3.3", "PXA-SDHCLK"),
 };
 
 static int __init mmp2_init(void)
@@ -222,8 +222,8 @@
 MMP2_DEVICE(twsi5, "pxa2xx-i2c", 4, TWSI5, 0xd4033800, 0x70);
 MMP2_DEVICE(twsi6, "pxa2xx-i2c", 5, TWSI6, 0xd4034000, 0x70);
 MMP2_DEVICE(nand, "pxa3xx-nand", -1, NAND, 0xd4283000, 0x100, 28, 29);
-MMP2_DEVICE(sdh0, "sdhci-pxa", 0, MMC, 0xd4280000, 0x120);
-MMP2_DEVICE(sdh1, "sdhci-pxa", 1, MMC2, 0xd4280800, 0x120);
-MMP2_DEVICE(sdh2, "sdhci-pxa", 2, MMC3, 0xd4281000, 0x120);
-MMP2_DEVICE(sdh3, "sdhci-pxa", 3, MMC4, 0xd4281800, 0x120);
+MMP2_DEVICE(sdh0, "sdhci-pxav3", 0, MMC, 0xd4280000, 0x120);
+MMP2_DEVICE(sdh1, "sdhci-pxav3", 1, MMC2, 0xd4280800, 0x120);
+MMP2_DEVICE(sdh2, "sdhci-pxav3", 2, MMC3, 0xd4281000, 0x120);
+MMP2_DEVICE(sdh3, "sdhci-pxav3", 3, MMC4, 0xd4281800, 0x120);
 
diff --git a/arch/arm/plat-pxa/include/plat/sdhci.h b/arch/arm/plat-pxa/include/plat/sdhci.h
deleted file mode 100644
index 1ab332e..0000000
--- a/arch/arm/plat-pxa/include/plat/sdhci.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* linux/arch/arm/plat-pxa/include/plat/sdhci.h
- *
- * Copyright 2010 Marvell
- *	Zhangfei Gao <zhangfei.gao@marvell.com>
- *
- * PXA Platform - SDHCI platform data definitions
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __PLAT_PXA_SDHCI_H
-#define __PLAT_PXA_SDHCI_H
-
-/* pxa specific flag */
-/* Require clock free running */
-#define PXA_FLAG_DISABLE_CLOCK_GATING (1<<0)
-
-/* Board design supports 8-bit data on SD/SDIO BUS */
-#define PXA_FLAG_SD_8_BIT_CAPABLE_SLOT (1<<2)
-
-/*
- * struct pxa_sdhci_platdata() - Platform device data for PXA SDHCI
- * @max_speed: the maximum speed supported
- * @quirks: quirks of specific device
- * @flags: flags for platform requirement
- */
-struct sdhci_pxa_platdata {
-	unsigned int	max_speed;
-	unsigned int	quirks;
-	unsigned int	flags;
-};
-
-#endif /* __PLAT_PXA_SDHCI_H */
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index f85e422..1ff5486 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -106,6 +106,16 @@
 
 static DEFINE_MUTEX(open_lock);
 
+enum mmc_blk_status {
+	MMC_BLK_SUCCESS = 0,
+	MMC_BLK_PARTIAL,
+	MMC_BLK_RETRY,
+	MMC_BLK_RETRY_SINGLE,
+	MMC_BLK_DATA_ERR,
+	MMC_BLK_CMD_ERR,
+	MMC_BLK_ABORT,
+};
+
 module_param(perdev_minors, int, 0444);
 MODULE_PARM_DESC(perdev_minors, "Minors numbers to allocate per device");
 
@@ -427,14 +437,6 @@
 #endif
 };
 
-struct mmc_blk_request {
-	struct mmc_request	mrq;
-	struct mmc_command	sbc;
-	struct mmc_command	cmd;
-	struct mmc_command	stop;
-	struct mmc_data		data;
-};
-
 static inline int mmc_blk_part_switch(struct mmc_card *card,
 				      struct mmc_blk_data *md)
 {
@@ -525,7 +527,20 @@
 	return result;
 }
 
-static u32 get_card_status(struct mmc_card *card, struct request *req)
+static int send_stop(struct mmc_card *card, u32 *status)
+{
+	struct mmc_command cmd = {0};
+	int err;
+
+	cmd.opcode = MMC_STOP_TRANSMISSION;
+	cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
+	err = mmc_wait_for_cmd(card->host, &cmd, 5);
+	if (err == 0)
+		*status = cmd.resp[0];
+	return err;
+}
+
+static int get_card_status(struct mmc_card *card, u32 *status, int retries)
 {
 	struct mmc_command cmd = {0};
 	int err;
@@ -534,11 +549,141 @@
 	if (!mmc_host_is_spi(card->host))
 		cmd.arg = card->rca << 16;
 	cmd.flags = MMC_RSP_SPI_R2 | MMC_RSP_R1 | MMC_CMD_AC;
-	err = mmc_wait_for_cmd(card->host, &cmd, 0);
+	err = mmc_wait_for_cmd(card->host, &cmd, retries);
+	if (err == 0)
+		*status = cmd.resp[0];
+	return err;
+}
+
+#define ERR_RETRY	2
+#define ERR_ABORT	1
+#define ERR_CONTINUE	0
+
+static int mmc_blk_cmd_error(struct request *req, const char *name, int error,
+	bool status_valid, u32 status)
+{
+	switch (error) {
+	case -EILSEQ:
+		/* response crc error, retry the r/w cmd */
+		pr_err("%s: %s sending %s command, card status %#x\n",
+			req->rq_disk->disk_name, "response CRC error",
+			name, status);
+		return ERR_RETRY;
+
+	case -ETIMEDOUT:
+		pr_err("%s: %s sending %s command, card status %#x\n",
+			req->rq_disk->disk_name, "timed out", name, status);
+
+		/* If the status cmd initially failed, retry the r/w cmd */
+		if (!status_valid)
+			return ERR_RETRY;
+
+		/*
+		 * If it was a r/w cmd crc error, or illegal command
+		 * (eg, issued in wrong state) then retry - we should
+		 * have corrected the state problem above.
+		 */
+		if (status & (R1_COM_CRC_ERROR | R1_ILLEGAL_COMMAND))
+			return ERR_RETRY;
+
+		/* Otherwise abort the command */
+		return ERR_ABORT;
+
+	default:
+		/* We don't understand the error code the driver gave us */
+		pr_err("%s: unknown error %d sending read/write command, card status %#x\n",
+		       req->rq_disk->disk_name, error, status);
+		return ERR_ABORT;
+	}
+}
+
+/*
+ * Initial r/w and stop cmd error recovery.
+ * We don't know whether the card received the r/w cmd or not, so try to
+ * restore things back to a sane state.  Essentially, we do this as follows:
+ * - Obtain card status.  If the first attempt to obtain card status fails,
+ *   the status word will reflect the failed status cmd, not the failed
+ *   r/w cmd.  If we fail to obtain card status, it suggests we can no
+ *   longer communicate with the card.
+ * - Check the card state.  If the card received the cmd but there was a
+ *   transient problem with the response, it might still be in a data transfer
+ *   mode.  Try to send it a stop command.  If this fails, we can't recover.
+ * - If the r/w cmd failed due to a response CRC error, it was probably
+ *   transient, so retry the cmd.
+ * - If the r/w cmd timed out, but we didn't get the r/w cmd status, retry.
+ * - If the r/w cmd timed out, and the r/w cmd failed due to CRC error or
+ *   illegal cmd, retry.
+ * Otherwise we don't understand what happened, so abort.
+ */
+static int mmc_blk_cmd_recovery(struct mmc_card *card, struct request *req,
+	struct mmc_blk_request *brq)
+{
+	bool prev_cmd_status_valid = true;
+	u32 status, stop_status = 0;
+	int err, retry;
+
+	/*
+	 * Try to get card status which indicates both the card state
+	 * and why there was no response.  If the first attempt fails,
+	 * we can't be sure the returned status is for the r/w command.
+	 */
+	for (retry = 2; retry >= 0; retry--) {
+		err = get_card_status(card, &status, 0);
+		if (!err)
+			break;
+
+		prev_cmd_status_valid = false;
+		pr_err("%s: error %d sending status command, %sing\n",
+		       req->rq_disk->disk_name, err, retry ? "retry" : "abort");
+	}
+
+	/* We couldn't get a response from the card.  Give up. */
 	if (err)
-		printk(KERN_ERR "%s: error %d sending status command",
-		       req->rq_disk->disk_name, err);
-	return cmd.resp[0];
+		return ERR_ABORT;
+
+	/*
+	 * Check the current card state.  If it is in some data transfer
+	 * mode, tell it to stop (and hopefully transition back to TRAN.)
+	 */
+	if (R1_CURRENT_STATE(status) == R1_STATE_DATA ||
+	    R1_CURRENT_STATE(status) == R1_STATE_RCV) {
+		err = send_stop(card, &stop_status);
+		if (err)
+			pr_err("%s: error %d sending stop command\n",
+			       req->rq_disk->disk_name, err);
+
+		/*
+		 * If the stop cmd also timed out, the card is probably
+		 * not present, so abort.  Other errors are bad news too.
+		 */
+		if (err)
+			return ERR_ABORT;
+	}
+
+	/* Check for set block count errors */
+	if (brq->sbc.error)
+		return mmc_blk_cmd_error(req, "SET_BLOCK_COUNT", brq->sbc.error,
+				prev_cmd_status_valid, status);
+
+	/* Check for r/w command errors */
+	if (brq->cmd.error)
+		return mmc_blk_cmd_error(req, "r/w cmd", brq->cmd.error,
+				prev_cmd_status_valid, status);
+
+	/* Now for stop errors.  These aren't fatal to the transfer. */
+	pr_err("%s: error %d sending stop command, original cmd response %#x, card status %#x\n",
+	       req->rq_disk->disk_name, brq->stop.error,
+	       brq->cmd.resp[0], status);
+
+	/*
+	 * Subsitute in our own stop status as this will give the error
+	 * state which happened during the execution of the r/w command.
+	 */
+	if (stop_status) {
+		brq->stop.resp[0] = stop_status;
+		brq->stop.error = 0;
+	}
+	return ERR_CONTINUE;
 }
 
 static int mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
@@ -669,12 +814,114 @@
 	}
 }
 
-static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
+#define CMD_ERRORS							\
+	(R1_OUT_OF_RANGE |	/* Command argument out of range */	\
+	 R1_ADDRESS_ERROR |	/* Misaligned address */		\
+	 R1_BLOCK_LEN_ERROR |	/* Transferred block length incorrect */\
+	 R1_WP_VIOLATION |	/* Tried to write to protected block */	\
+	 R1_CC_ERROR |		/* Card controller error */		\
+	 R1_ERROR)		/* General/unknown error */
+
+static int mmc_blk_err_check(struct mmc_card *card,
+			     struct mmc_async_req *areq)
 {
+	enum mmc_blk_status ret = MMC_BLK_SUCCESS;
+	struct mmc_queue_req *mq_mrq = container_of(areq, struct mmc_queue_req,
+						    mmc_active);
+	struct mmc_blk_request *brq = &mq_mrq->brq;
+	struct request *req = mq_mrq->req;
+
+	/*
+	 * sbc.error indicates a problem with the set block count
+	 * command.  No data will have been transferred.
+	 *
+	 * cmd.error indicates a problem with the r/w command.  No
+	 * data will have been transferred.
+	 *
+	 * stop.error indicates a problem with the stop command.  Data
+	 * may have been transferred, or may still be transferring.
+	 */
+	if (brq->sbc.error || brq->cmd.error || brq->stop.error) {
+		switch (mmc_blk_cmd_recovery(card, req, brq)) {
+		case ERR_RETRY:
+			return MMC_BLK_RETRY;
+		case ERR_ABORT:
+			return MMC_BLK_ABORT;
+		case ERR_CONTINUE:
+			break;
+		}
+	}
+
+	/*
+	 * Check for errors relating to the execution of the
+	 * initial command - such as address errors.  No data
+	 * has been transferred.
+	 */
+	if (brq->cmd.resp[0] & CMD_ERRORS) {
+		pr_err("%s: r/w command failed, status = %#x\n",
+		       req->rq_disk->disk_name, brq->cmd.resp[0]);
+		return MMC_BLK_ABORT;
+	}
+
+	/*
+	 * Everything else is either success, or a data error of some
+	 * kind.  If it was a write, we may have transitioned to
+	 * program mode, which we have to wait for it to complete.
+	 */
+	if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) {
+		u32 status;
+		do {
+			int err = get_card_status(card, &status, 5);
+			if (err) {
+				printk(KERN_ERR "%s: error %d requesting status\n",
+				       req->rq_disk->disk_name, err);
+				return MMC_BLK_CMD_ERR;
+			}
+			/*
+			 * Some cards mishandle the status bits,
+			 * so make sure to check both the busy
+			 * indication and the card state.
+			 */
+		} while (!(status & R1_READY_FOR_DATA) ||
+			 (R1_CURRENT_STATE(status) == R1_STATE_PRG));
+	}
+
+	if (brq->data.error) {
+		pr_err("%s: error %d transferring data, sector %u, nr %u, cmd response %#x, card status %#x\n",
+		       req->rq_disk->disk_name, brq->data.error,
+		       (unsigned)blk_rq_pos(req),
+		       (unsigned)blk_rq_sectors(req),
+		       brq->cmd.resp[0], brq->stop.resp[0]);
+
+		if (rq_data_dir(req) == READ) {
+			if (brq->data.blocks > 1) {
+				/* Redo read one sector at a time */
+				pr_warning("%s: retrying using single block read\n",
+					   req->rq_disk->disk_name);
+				return MMC_BLK_RETRY_SINGLE;
+			}
+			return MMC_BLK_DATA_ERR;
+		} else {
+			return MMC_BLK_CMD_ERR;
+		}
+	}
+
+	if (ret == MMC_BLK_SUCCESS &&
+	    blk_rq_bytes(req) != brq->data.bytes_xfered)
+		ret = MMC_BLK_PARTIAL;
+
+	return ret;
+}
+
+static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
+			       struct mmc_card *card,
+			       int disable_multi,
+			       struct mmc_queue *mq)
+{
+	u32 readcmd, writecmd;
+	struct mmc_blk_request *brq = &mqrq->brq;
+	struct request *req = mqrq->req;
 	struct mmc_blk_data *md = mq->data;
-	struct mmc_card *card = md->queue.card;
-	struct mmc_blk_request brq;
-	int ret = 1, disable_multi = 0;
 
 	/*
 	 * Reliable writes are used to implement Forced Unit Access and
@@ -685,224 +932,206 @@
 		(rq_data_dir(req) == WRITE) &&
 		(md->flags & MMC_BLK_REL_WR);
 
+	memset(brq, 0, sizeof(struct mmc_blk_request));
+	brq->mrq.cmd = &brq->cmd;
+	brq->mrq.data = &brq->data;
+
+	brq->cmd.arg = blk_rq_pos(req);
+	if (!mmc_card_blockaddr(card))
+		brq->cmd.arg <<= 9;
+	brq->cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
+	brq->data.blksz = 512;
+	brq->stop.opcode = MMC_STOP_TRANSMISSION;
+	brq->stop.arg = 0;
+	brq->stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
+	brq->data.blocks = blk_rq_sectors(req);
+
+	/*
+	 * The block layer doesn't support all sector count
+	 * restrictions, so we need to be prepared for too big
+	 * requests.
+	 */
+	if (brq->data.blocks > card->host->max_blk_count)
+		brq->data.blocks = card->host->max_blk_count;
+
+	/*
+	 * After a read error, we redo the request one sector at a time
+	 * in order to accurately determine which sectors can be read
+	 * successfully.
+	 */
+	if (disable_multi && brq->data.blocks > 1)
+		brq->data.blocks = 1;
+
+	if (brq->data.blocks > 1 || do_rel_wr) {
+		/* SPI multiblock writes terminate using a special
+		 * token, not a STOP_TRANSMISSION request.
+		 */
+		if (!mmc_host_is_spi(card->host) ||
+		    rq_data_dir(req) == READ)
+			brq->mrq.stop = &brq->stop;
+		readcmd = MMC_READ_MULTIPLE_BLOCK;
+		writecmd = MMC_WRITE_MULTIPLE_BLOCK;
+	} else {
+		brq->mrq.stop = NULL;
+		readcmd = MMC_READ_SINGLE_BLOCK;
+		writecmd = MMC_WRITE_BLOCK;
+	}
+	if (rq_data_dir(req) == READ) {
+		brq->cmd.opcode = readcmd;
+		brq->data.flags |= MMC_DATA_READ;
+	} else {
+		brq->cmd.opcode = writecmd;
+		brq->data.flags |= MMC_DATA_WRITE;
+	}
+
+	if (do_rel_wr)
+		mmc_apply_rel_rw(brq, card, req);
+
+	/*
+	 * Pre-defined multi-block transfers are preferable to
+	 * open ended-ones (and necessary for reliable writes).
+	 * However, it is not sufficient to just send CMD23,
+	 * and avoid the final CMD12, as on an error condition
+	 * CMD12 (stop) needs to be sent anyway. This, coupled
+	 * with Auto-CMD23 enhancements provided by some
+	 * hosts, means that the complexity of dealing
+	 * with this is best left to the host. If CMD23 is
+	 * supported by card and host, we'll fill sbc in and let
+	 * the host deal with handling it correctly. This means
+	 * that for hosts that don't expose MMC_CAP_CMD23, no
+	 * change of behavior will be observed.
+	 *
+	 * N.B: Some MMC cards experience perf degradation.
+	 * We'll avoid using CMD23-bounded multiblock writes for
+	 * these, while retaining features like reliable writes.
+	 */
+
+	if ((md->flags & MMC_BLK_CMD23) &&
+	    mmc_op_multi(brq->cmd.opcode) &&
+	    (do_rel_wr || !(card->quirks & MMC_QUIRK_BLK_NO_CMD23))) {
+		brq->sbc.opcode = MMC_SET_BLOCK_COUNT;
+		brq->sbc.arg = brq->data.blocks |
+			(do_rel_wr ? (1 << 31) : 0);
+		brq->sbc.flags = MMC_RSP_R1 | MMC_CMD_AC;
+		brq->mrq.sbc = &brq->sbc;
+	}
+
+	mmc_set_data_timeout(&brq->data, card);
+
+	brq->data.sg = mqrq->sg;
+	brq->data.sg_len = mmc_queue_map_sg(mq, mqrq);
+
+	/*
+	 * Adjust the sg list so it is the same size as the
+	 * request.
+	 */
+	if (brq->data.blocks != blk_rq_sectors(req)) {
+		int i, data_size = brq->data.blocks << 9;
+		struct scatterlist *sg;
+
+		for_each_sg(brq->data.sg, sg, brq->data.sg_len, i) {
+			data_size -= sg->length;
+			if (data_size <= 0) {
+				sg->length += data_size;
+				i++;
+				break;
+			}
+		}
+		brq->data.sg_len = i;
+	}
+
+	mqrq->mmc_active.mrq = &brq->mrq;
+	mqrq->mmc_active.err_check = mmc_blk_err_check;
+
+	mmc_queue_bounce_pre(mqrq);
+}
+
+static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
+{
+	struct mmc_blk_data *md = mq->data;
+	struct mmc_card *card = md->queue.card;
+	struct mmc_blk_request *brq = &mq->mqrq_cur->brq;
+	int ret = 1, disable_multi = 0, retry = 0;
+	enum mmc_blk_status status;
+	struct mmc_queue_req *mq_rq;
+	struct request *req;
+	struct mmc_async_req *areq;
+
+	if (!rqc && !mq->mqrq_prev->req)
+		return 0;
+
 	do {
-		struct mmc_command cmd = {0};
-		u32 readcmd, writecmd, status = 0;
+		if (rqc) {
+			mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq);
+			areq = &mq->mqrq_cur->mmc_active;
+		} else
+			areq = NULL;
+		areq = mmc_start_req(card->host, areq, (int *) &status);
+		if (!areq)
+			return 0;
 
-		memset(&brq, 0, sizeof(struct mmc_blk_request));
-		brq.mrq.cmd = &brq.cmd;
-		brq.mrq.data = &brq.data;
+		mq_rq = container_of(areq, struct mmc_queue_req, mmc_active);
+		brq = &mq_rq->brq;
+		req = mq_rq->req;
+		mmc_queue_bounce_post(mq_rq);
 
-		brq.cmd.arg = blk_rq_pos(req);
-		if (!mmc_card_blockaddr(card))
-			brq.cmd.arg <<= 9;
-		brq.cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
-		brq.data.blksz = 512;
-		brq.stop.opcode = MMC_STOP_TRANSMISSION;
-		brq.stop.arg = 0;
-		brq.stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
-		brq.data.blocks = blk_rq_sectors(req);
-
-		/*
-		 * The block layer doesn't support all sector count
-		 * restrictions, so we need to be prepared for too big
-		 * requests.
-		 */
-		if (brq.data.blocks > card->host->max_blk_count)
-			brq.data.blocks = card->host->max_blk_count;
-
-		/*
-		 * After a read error, we redo the request one sector at a time
-		 * in order to accurately determine which sectors can be read
-		 * successfully.
-		 */
-		if (disable_multi && brq.data.blocks > 1)
-			brq.data.blocks = 1;
-
-		if (brq.data.blocks > 1 || do_rel_wr) {
-			/* SPI multiblock writes terminate using a special
-			 * token, not a STOP_TRANSMISSION request.
+		switch (status) {
+		case MMC_BLK_SUCCESS:
+		case MMC_BLK_PARTIAL:
+			/*
+			 * A block was successfully transferred.
 			 */
-			if (!mmc_host_is_spi(card->host) ||
-			    rq_data_dir(req) == READ)
-				brq.mrq.stop = &brq.stop;
-			readcmd = MMC_READ_MULTIPLE_BLOCK;
-			writecmd = MMC_WRITE_MULTIPLE_BLOCK;
-		} else {
-			brq.mrq.stop = NULL;
-			readcmd = MMC_READ_SINGLE_BLOCK;
-			writecmd = MMC_WRITE_BLOCK;
-		}
-		if (rq_data_dir(req) == READ) {
-			brq.cmd.opcode = readcmd;
-			brq.data.flags |= MMC_DATA_READ;
-		} else {
-			brq.cmd.opcode = writecmd;
-			brq.data.flags |= MMC_DATA_WRITE;
-		}
-
-		if (do_rel_wr)
-			mmc_apply_rel_rw(&brq, card, req);
-
-		/*
-		 * Pre-defined multi-block transfers are preferable to
-		 * open ended-ones (and necessary for reliable writes).
-		 * However, it is not sufficient to just send CMD23,
-		 * and avoid the final CMD12, as on an error condition
-		 * CMD12 (stop) needs to be sent anyway. This, coupled
-		 * with Auto-CMD23 enhancements provided by some
-		 * hosts, means that the complexity of dealing
-		 * with this is best left to the host. If CMD23 is
-		 * supported by card and host, we'll fill sbc in and let
-		 * the host deal with handling it correctly. This means
-		 * that for hosts that don't expose MMC_CAP_CMD23, no
-		 * change of behavior will be observed.
-		 *
-		 * N.B: Some MMC cards experience perf degradation.
-		 * We'll avoid using CMD23-bounded multiblock writes for
-		 * these, while retaining features like reliable writes.
-		 */
-
-		if ((md->flags & MMC_BLK_CMD23) &&
-		    mmc_op_multi(brq.cmd.opcode) &&
-		    (do_rel_wr || !(card->quirks & MMC_QUIRK_BLK_NO_CMD23))) {
-			brq.sbc.opcode = MMC_SET_BLOCK_COUNT;
-			brq.sbc.arg = brq.data.blocks |
-				(do_rel_wr ? (1 << 31) : 0);
-			brq.sbc.flags = MMC_RSP_R1 | MMC_CMD_AC;
-			brq.mrq.sbc = &brq.sbc;
-		}
-
-		mmc_set_data_timeout(&brq.data, card);
-
-		brq.data.sg = mq->sg;
-		brq.data.sg_len = mmc_queue_map_sg(mq);
-
-		/*
-		 * Adjust the sg list so it is the same size as the
-		 * request.
-		 */
-		if (brq.data.blocks != blk_rq_sectors(req)) {
-			int i, data_size = brq.data.blocks << 9;
-			struct scatterlist *sg;
-
-			for_each_sg(brq.data.sg, sg, brq.data.sg_len, i) {
-				data_size -= sg->length;
-				if (data_size <= 0) {
-					sg->length += data_size;
-					i++;
-					break;
-				}
-			}
-			brq.data.sg_len = i;
-		}
-
-		mmc_queue_bounce_pre(mq);
-
-		mmc_wait_for_req(card->host, &brq.mrq);
-
-		mmc_queue_bounce_post(mq);
-
-		/*
-		 * Check for errors here, but don't jump to cmd_err
-		 * until later as we need to wait for the card to leave
-		 * programming mode even when things go wrong.
-		 */
-		if (brq.sbc.error || brq.cmd.error ||
-		    brq.data.error || brq.stop.error) {
-			if (brq.data.blocks > 1 && rq_data_dir(req) == READ) {
-				/* Redo read one sector at a time */
-				printk(KERN_WARNING "%s: retrying using single "
-				       "block read\n", req->rq_disk->disk_name);
-				disable_multi = 1;
-				continue;
-			}
-			status = get_card_status(card, req);
-		}
-
-		if (brq.sbc.error) {
-			printk(KERN_ERR "%s: error %d sending SET_BLOCK_COUNT "
-			       "command, response %#x, card status %#x\n",
-			       req->rq_disk->disk_name, brq.sbc.error,
-			       brq.sbc.resp[0], status);
-		}
-
-		if (brq.cmd.error) {
-			printk(KERN_ERR "%s: error %d sending read/write "
-			       "command, response %#x, card status %#x\n",
-			       req->rq_disk->disk_name, brq.cmd.error,
-			       brq.cmd.resp[0], status);
-		}
-
-		if (brq.data.error) {
-			if (brq.data.error == -ETIMEDOUT && brq.mrq.stop)
-				/* 'Stop' response contains card status */
-				status = brq.mrq.stop->resp[0];
-			printk(KERN_ERR "%s: error %d transferring data,"
-			       " sector %u, nr %u, card status %#x\n",
-			       req->rq_disk->disk_name, brq.data.error,
-			       (unsigned)blk_rq_pos(req),
-			       (unsigned)blk_rq_sectors(req), status);
-		}
-
-		if (brq.stop.error) {
-			printk(KERN_ERR "%s: error %d sending stop command, "
-			       "response %#x, card status %#x\n",
-			       req->rq_disk->disk_name, brq.stop.error,
-			       brq.stop.resp[0], status);
-		}
-
-		if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) {
-			do {
-				int err;
-
-				cmd.opcode = MMC_SEND_STATUS;
-				cmd.arg = card->rca << 16;
-				cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
-				err = mmc_wait_for_cmd(card->host, &cmd, 5);
-				if (err) {
-					printk(KERN_ERR "%s: error %d requesting status\n",
-					       req->rq_disk->disk_name, err);
-					goto cmd_err;
-				}
+			spin_lock_irq(&md->lock);
+			ret = __blk_end_request(req, 0,
+						brq->data.bytes_xfered);
+			spin_unlock_irq(&md->lock);
+			if (status == MMC_BLK_SUCCESS && ret) {
 				/*
-				 * Some cards mishandle the status bits,
-				 * so make sure to check both the busy
-				 * indication and the card state.
+				 * The blk_end_request has returned non zero
+				 * even though all data is transfered and no
+				 * erros returned by host.
+				 * If this happen it's a bug.
 				 */
-			} while (!(cmd.resp[0] & R1_READY_FOR_DATA) ||
-				(R1_CURRENT_STATE(cmd.resp[0]) == 7));
-
-#if 0
-			if (cmd.resp[0] & ~0x00000900)
-				printk(KERN_ERR "%s: status = %08x\n",
-				       req->rq_disk->disk_name, cmd.resp[0]);
-			if (mmc_decode_status(cmd.resp))
-				goto cmd_err;
-#endif
-		}
-
-		if (brq.cmd.error || brq.stop.error || brq.data.error) {
-			if (rq_data_dir(req) == READ) {
-				/*
-				 * After an error, we redo I/O one sector at a
-				 * time, so we only reach here after trying to
-				 * read a single sector.
-				 */
-				spin_lock_irq(&md->lock);
-				ret = __blk_end_request(req, -EIO, brq.data.blksz);
-				spin_unlock_irq(&md->lock);
-				continue;
+				printk(KERN_ERR "%s BUG rq_tot %d d_xfer %d\n",
+				       __func__, blk_rq_bytes(req),
+				       brq->data.bytes_xfered);
+				rqc = NULL;
+				goto cmd_abort;
 			}
+			break;
+		case MMC_BLK_CMD_ERR:
 			goto cmd_err;
+		case MMC_BLK_RETRY_SINGLE:
+			disable_multi = 1;
+			break;
+		case MMC_BLK_RETRY:
+			if (retry++ < 5)
+				break;
+		case MMC_BLK_ABORT:
+			goto cmd_abort;
+		case MMC_BLK_DATA_ERR:
+			/*
+			 * After an error, we redo I/O one sector at a
+			 * time, so we only reach here after trying to
+			 * read a single sector.
+			 */
+			spin_lock_irq(&md->lock);
+			ret = __blk_end_request(req, -EIO,
+						brq->data.blksz);
+			spin_unlock_irq(&md->lock);
+			if (!ret)
+				goto start_new_req;
+			break;
 		}
 
-		/*
-		 * A block was successfully transferred.
-		 */
-		spin_lock_irq(&md->lock);
-		ret = __blk_end_request(req, 0, brq.data.bytes_xfered);
-		spin_unlock_irq(&md->lock);
+		if (ret) {
+			/*
+			 * In case of a none complete request
+			 * prepare it again and resend.
+			 */
+			mmc_blk_rw_rq_prep(mq_rq, card, disable_multi, mq);
+			mmc_start_req(card->host, &mq_rq->mmc_active, NULL);
+		}
 	} while (ret);
 
 	return 1;
@@ -927,15 +1156,22 @@
 		}
 	} else {
 		spin_lock_irq(&md->lock);
-		ret = __blk_end_request(req, 0, brq.data.bytes_xfered);
+		ret = __blk_end_request(req, 0, brq->data.bytes_xfered);
 		spin_unlock_irq(&md->lock);
 	}
 
+ cmd_abort:
 	spin_lock_irq(&md->lock);
 	while (ret)
 		ret = __blk_end_request(req, -EIO, blk_rq_cur_bytes(req));
 	spin_unlock_irq(&md->lock);
 
+ start_new_req:
+	if (rqc) {
+		mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq);
+		mmc_start_req(card->host, &mq->mqrq_cur->mmc_active, NULL);
+	}
+
 	return 0;
 }
 
@@ -945,26 +1181,37 @@
 	struct mmc_blk_data *md = mq->data;
 	struct mmc_card *card = md->queue.card;
 
-	mmc_claim_host(card->host);
+	if (req && !mq->mqrq_prev->req)
+		/* claim host only for the first request */
+		mmc_claim_host(card->host);
+
 	ret = mmc_blk_part_switch(card, md);
 	if (ret) {
 		ret = 0;
 		goto out;
 	}
 
-	if (req->cmd_flags & REQ_DISCARD) {
+	if (req && req->cmd_flags & REQ_DISCARD) {
+		/* complete ongoing async transfer before issuing discard */
+		if (card->host->areq)
+			mmc_blk_issue_rw_rq(mq, NULL);
 		if (req->cmd_flags & REQ_SECURE)
 			ret = mmc_blk_issue_secdiscard_rq(mq, req);
 		else
 			ret = mmc_blk_issue_discard_rq(mq, req);
-	} else if (req->cmd_flags & REQ_FLUSH) {
+	} else if (req && req->cmd_flags & REQ_FLUSH) {
+		/* complete ongoing async transfer before issuing flush */
+		if (card->host->areq)
+			mmc_blk_issue_rw_rq(mq, NULL);
 		ret = mmc_blk_issue_flush(mq, req);
 	} else {
 		ret = mmc_blk_issue_rw_rq(mq, req);
 	}
 
 out:
-	mmc_release_host(card->host);
+	if (!req)
+		/* release host only when there are no more requests */
+		mmc_release_host(card->host);
 	return ret;
 }
 
diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c
index 233cdfa..006a5e9 100644
--- a/drivers/mmc/card/mmc_test.c
+++ b/drivers/mmc/card/mmc_test.c
@@ -148,6 +148,27 @@
 	struct mmc_test_general_result	*gr;
 };
 
+enum mmc_test_prep_media {
+	MMC_TEST_PREP_NONE = 0,
+	MMC_TEST_PREP_WRITE_FULL = 1 << 0,
+	MMC_TEST_PREP_ERASE = 1 << 1,
+};
+
+struct mmc_test_multiple_rw {
+	unsigned int *sg_len;
+	unsigned int *bs;
+	unsigned int len;
+	unsigned int size;
+	bool do_write;
+	bool do_nonblock_req;
+	enum mmc_test_prep_media prepare;
+};
+
+struct mmc_test_async_req {
+	struct mmc_async_req areq;
+	struct mmc_test_card *test;
+};
+
 /*******************************************************************/
 /*  General helper functions                                       */
 /*******************************************************************/
@@ -367,21 +388,26 @@
  * Map memory into a scatterlist.  Optionally allow the same memory to be
  * mapped more than once.
  */
-static int mmc_test_map_sg(struct mmc_test_mem *mem, unsigned long sz,
+static int mmc_test_map_sg(struct mmc_test_mem *mem, unsigned long size,
 			   struct scatterlist *sglist, int repeat,
 			   unsigned int max_segs, unsigned int max_seg_sz,
-			   unsigned int *sg_len)
+			   unsigned int *sg_len, int min_sg_len)
 {
 	struct scatterlist *sg = NULL;
 	unsigned int i;
+	unsigned long sz = size;
 
 	sg_init_table(sglist, max_segs);
+	if (min_sg_len > max_segs)
+		min_sg_len = max_segs;
 
 	*sg_len = 0;
 	do {
 		for (i = 0; i < mem->cnt; i++) {
 			unsigned long len = PAGE_SIZE << mem->arr[i].order;
 
+			if (min_sg_len && (size / min_sg_len < len))
+				len = ALIGN(size / min_sg_len, 512);
 			if (len > sz)
 				len = sz;
 			if (len > max_seg_sz)
@@ -554,11 +580,12 @@
 
 	printk(KERN_INFO "%s: Transfer of %u x %u sectors (%u x %u%s KiB) took "
 			 "%lu.%09lu seconds (%u kB/s, %u KiB/s, "
-			 "%u.%02u IOPS)\n",
+			 "%u.%02u IOPS, sg_len %d)\n",
 			 mmc_hostname(test->card->host), count, sectors, count,
 			 sectors >> 1, (sectors & 1 ? ".5" : ""),
 			 (unsigned long)ts.tv_sec, (unsigned long)ts.tv_nsec,
-			 rate / 1000, rate / 1024, iops / 100, iops % 100);
+			 rate / 1000, rate / 1024, iops / 100, iops % 100,
+			 test->area.sg_len);
 
 	mmc_test_save_transfer_result(test, count, sectors, ts, rate, iops);
 }
@@ -661,7 +688,7 @@
  * Checks that a normal transfer didn't have any errors
  */
 static int mmc_test_check_result(struct mmc_test_card *test,
-	struct mmc_request *mrq)
+				 struct mmc_request *mrq)
 {
 	int ret;
 
@@ -685,6 +712,17 @@
 	return ret;
 }
 
+static int mmc_test_check_result_async(struct mmc_card *card,
+				       struct mmc_async_req *areq)
+{
+	struct mmc_test_async_req *test_async =
+		container_of(areq, struct mmc_test_async_req, areq);
+
+	mmc_test_wait_busy(test_async->test);
+
+	return mmc_test_check_result(test_async->test, areq->mrq);
+}
+
 /*
  * Checks that a "short transfer" behaved as expected
  */
@@ -720,6 +758,85 @@
 }
 
 /*
+ * Tests nonblock transfer with certain parameters
+ */
+static void mmc_test_nonblock_reset(struct mmc_request *mrq,
+				    struct mmc_command *cmd,
+				    struct mmc_command *stop,
+				    struct mmc_data *data)
+{
+	memset(mrq, 0, sizeof(struct mmc_request));
+	memset(cmd, 0, sizeof(struct mmc_command));
+	memset(data, 0, sizeof(struct mmc_data));
+	memset(stop, 0, sizeof(struct mmc_command));
+
+	mrq->cmd = cmd;
+	mrq->data = data;
+	mrq->stop = stop;
+}
+static int mmc_test_nonblock_transfer(struct mmc_test_card *test,
+				      struct scatterlist *sg, unsigned sg_len,
+				      unsigned dev_addr, unsigned blocks,
+				      unsigned blksz, int write, int count)
+{
+	struct mmc_request mrq1;
+	struct mmc_command cmd1;
+	struct mmc_command stop1;
+	struct mmc_data data1;
+
+	struct mmc_request mrq2;
+	struct mmc_command cmd2;
+	struct mmc_command stop2;
+	struct mmc_data data2;
+
+	struct mmc_test_async_req test_areq[2];
+	struct mmc_async_req *done_areq;
+	struct mmc_async_req *cur_areq = &test_areq[0].areq;
+	struct mmc_async_req *other_areq = &test_areq[1].areq;
+	int i;
+	int ret;
+
+	test_areq[0].test = test;
+	test_areq[1].test = test;
+
+	mmc_test_nonblock_reset(&mrq1, &cmd1, &stop1, &data1);
+	mmc_test_nonblock_reset(&mrq2, &cmd2, &stop2, &data2);
+
+	cur_areq->mrq = &mrq1;
+	cur_areq->err_check = mmc_test_check_result_async;
+	other_areq->mrq = &mrq2;
+	other_areq->err_check = mmc_test_check_result_async;
+
+	for (i = 0; i < count; i++) {
+		mmc_test_prepare_mrq(test, cur_areq->mrq, sg, sg_len, dev_addr,
+				     blocks, blksz, write);
+		done_areq = mmc_start_req(test->card->host, cur_areq, &ret);
+
+		if (ret || (!done_areq && i > 0))
+			goto err;
+
+		if (done_areq) {
+			if (done_areq->mrq == &mrq2)
+				mmc_test_nonblock_reset(&mrq2, &cmd2,
+							&stop2, &data2);
+			else
+				mmc_test_nonblock_reset(&mrq1, &cmd1,
+							&stop1, &data1);
+		}
+		done_areq = cur_areq;
+		cur_areq = other_areq;
+		other_areq = done_areq;
+		dev_addr += blocks;
+	}
+
+	done_areq = mmc_start_req(test->card->host, NULL, &ret);
+
+	return ret;
+err:
+	return ret;
+}
+
+/*
  * Tests a basic transfer with certain parameters
  */
 static int mmc_test_simple_transfer(struct mmc_test_card *test,
@@ -1302,7 +1419,7 @@
  * Map sz bytes so that it can be transferred.
  */
 static int mmc_test_area_map(struct mmc_test_card *test, unsigned long sz,
-			     int max_scatter)
+			     int max_scatter, int min_sg_len)
 {
 	struct mmc_test_area *t = &test->area;
 	int err;
@@ -1315,7 +1432,7 @@
 				       &t->sg_len);
 	} else {
 		err = mmc_test_map_sg(t->mem, sz, t->sg, 1, t->max_segs,
-				      t->max_seg_sz, &t->sg_len);
+				      t->max_seg_sz, &t->sg_len, min_sg_len);
 	}
 	if (err)
 		printk(KERN_INFO "%s: Failed to map sg list\n",
@@ -1336,14 +1453,17 @@
 }
 
 /*
- * Map and transfer bytes.
+ * Map and transfer bytes for multiple transfers.
  */
-static int mmc_test_area_io(struct mmc_test_card *test, unsigned long sz,
-			    unsigned int dev_addr, int write, int max_scatter,
-			    int timed)
+static int mmc_test_area_io_seq(struct mmc_test_card *test, unsigned long sz,
+				unsigned int dev_addr, int write,
+				int max_scatter, int timed, int count,
+				bool nonblock, int min_sg_len)
 {
 	struct timespec ts1, ts2;
-	int ret;
+	int ret = 0;
+	int i;
+	struct mmc_test_area *t = &test->area;
 
 	/*
 	 * In the case of a maximally scattered transfer, the maximum transfer
@@ -1361,14 +1481,21 @@
 			sz = max_tfr;
 	}
 
-	ret = mmc_test_area_map(test, sz, max_scatter);
+	ret = mmc_test_area_map(test, sz, max_scatter, min_sg_len);
 	if (ret)
 		return ret;
 
 	if (timed)
 		getnstimeofday(&ts1);
+	if (nonblock)
+		ret = mmc_test_nonblock_transfer(test, t->sg, t->sg_len,
+				 dev_addr, t->blocks, 512, write, count);
+	else
+		for (i = 0; i < count && ret == 0; i++) {
+			ret = mmc_test_area_transfer(test, dev_addr, write);
+			dev_addr += sz >> 9;
+		}
 
-	ret = mmc_test_area_transfer(test, dev_addr, write);
 	if (ret)
 		return ret;
 
@@ -1376,11 +1503,19 @@
 		getnstimeofday(&ts2);
 
 	if (timed)
-		mmc_test_print_rate(test, sz, &ts1, &ts2);
+		mmc_test_print_avg_rate(test, sz, count, &ts1, &ts2);
 
 	return 0;
 }
 
+static int mmc_test_area_io(struct mmc_test_card *test, unsigned long sz,
+			    unsigned int dev_addr, int write, int max_scatter,
+			    int timed)
+{
+	return mmc_test_area_io_seq(test, sz, dev_addr, write, max_scatter,
+				    timed, 1, false, 0);
+}
+
 /*
  * Write the test area entirely.
  */
@@ -1954,6 +2089,245 @@
 	return mmc_test_large_seq_perf(test, 1);
 }
 
+static int mmc_test_rw_multiple(struct mmc_test_card *test,
+				struct mmc_test_multiple_rw *tdata,
+				unsigned int reqsize, unsigned int size,
+				int min_sg_len)
+{
+	unsigned int dev_addr;
+	struct mmc_test_area *t = &test->area;
+	int ret = 0;
+
+	/* Set up test area */
+	if (size > mmc_test_capacity(test->card) / 2 * 512)
+		size = mmc_test_capacity(test->card) / 2 * 512;
+	if (reqsize > t->max_tfr)
+		reqsize = t->max_tfr;
+	dev_addr = mmc_test_capacity(test->card) / 4;
+	if ((dev_addr & 0xffff0000))
+		dev_addr &= 0xffff0000; /* Round to 64MiB boundary */
+	else
+		dev_addr &= 0xfffff800; /* Round to 1MiB boundary */
+	if (!dev_addr)
+		goto err;
+
+	if (reqsize > size)
+		return 0;
+
+	/* prepare test area */
+	if (mmc_can_erase(test->card) &&
+	    tdata->prepare & MMC_TEST_PREP_ERASE) {
+		ret = mmc_erase(test->card, dev_addr,
+				size / 512, MMC_SECURE_ERASE_ARG);
+		if (ret)
+			ret = mmc_erase(test->card, dev_addr,
+					size / 512, MMC_ERASE_ARG);
+		if (ret)
+			goto err;
+	}
+
+	/* Run test */
+	ret = mmc_test_area_io_seq(test, reqsize, dev_addr,
+				   tdata->do_write, 0, 1, size / reqsize,
+				   tdata->do_nonblock_req, min_sg_len);
+	if (ret)
+		goto err;
+
+	return ret;
+ err:
+	printk(KERN_INFO "[%s] error\n", __func__);
+	return ret;
+}
+
+static int mmc_test_rw_multiple_size(struct mmc_test_card *test,
+				     struct mmc_test_multiple_rw *rw)
+{
+	int ret = 0;
+	int i;
+	void *pre_req = test->card->host->ops->pre_req;
+	void *post_req = test->card->host->ops->post_req;
+
+	if (rw->do_nonblock_req &&
+	    ((!pre_req && post_req) || (pre_req && !post_req))) {
+		printk(KERN_INFO "error: only one of pre/post is defined\n");
+		return -EINVAL;
+	}
+
+	for (i = 0 ; i < rw->len && ret == 0; i++) {
+		ret = mmc_test_rw_multiple(test, rw, rw->bs[i], rw->size, 0);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+static int mmc_test_rw_multiple_sg_len(struct mmc_test_card *test,
+				       struct mmc_test_multiple_rw *rw)
+{
+	int ret = 0;
+	int i;
+
+	for (i = 0 ; i < rw->len && ret == 0; i++) {
+		ret = mmc_test_rw_multiple(test, rw, 512*1024, rw->size,
+					   rw->sg_len[i]);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+/*
+ * Multiple blocking write 4k to 4 MB chunks
+ */
+static int mmc_test_profile_mult_write_blocking_perf(struct mmc_test_card *test)
+{
+	unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16,
+			     1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22};
+	struct mmc_test_multiple_rw test_data = {
+		.bs = bs,
+		.size = TEST_AREA_MAX_SIZE,
+		.len = ARRAY_SIZE(bs),
+		.do_write = true,
+		.do_nonblock_req = false,
+		.prepare = MMC_TEST_PREP_ERASE,
+	};
+
+	return mmc_test_rw_multiple_size(test, &test_data);
+};
+
+/*
+ * Multiple non-blocking write 4k to 4 MB chunks
+ */
+static int mmc_test_profile_mult_write_nonblock_perf(struct mmc_test_card *test)
+{
+	unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16,
+			     1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22};
+	struct mmc_test_multiple_rw test_data = {
+		.bs = bs,
+		.size = TEST_AREA_MAX_SIZE,
+		.len = ARRAY_SIZE(bs),
+		.do_write = true,
+		.do_nonblock_req = true,
+		.prepare = MMC_TEST_PREP_ERASE,
+	};
+
+	return mmc_test_rw_multiple_size(test, &test_data);
+}
+
+/*
+ * Multiple blocking read 4k to 4 MB chunks
+ */
+static int mmc_test_profile_mult_read_blocking_perf(struct mmc_test_card *test)
+{
+	unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16,
+			     1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22};
+	struct mmc_test_multiple_rw test_data = {
+		.bs = bs,
+		.size = TEST_AREA_MAX_SIZE,
+		.len = ARRAY_SIZE(bs),
+		.do_write = false,
+		.do_nonblock_req = false,
+		.prepare = MMC_TEST_PREP_NONE,
+	};
+
+	return mmc_test_rw_multiple_size(test, &test_data);
+}
+
+/*
+ * Multiple non-blocking read 4k to 4 MB chunks
+ */
+static int mmc_test_profile_mult_read_nonblock_perf(struct mmc_test_card *test)
+{
+	unsigned int bs[] = {1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16,
+			     1 << 17, 1 << 18, 1 << 19, 1 << 20, 1 << 22};
+	struct mmc_test_multiple_rw test_data = {
+		.bs = bs,
+		.size = TEST_AREA_MAX_SIZE,
+		.len = ARRAY_SIZE(bs),
+		.do_write = false,
+		.do_nonblock_req = true,
+		.prepare = MMC_TEST_PREP_NONE,
+	};
+
+	return mmc_test_rw_multiple_size(test, &test_data);
+}
+
+/*
+ * Multiple blocking write 1 to 512 sg elements
+ */
+static int mmc_test_profile_sglen_wr_blocking_perf(struct mmc_test_card *test)
+{
+	unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6,
+				 1 << 7, 1 << 8, 1 << 9};
+	struct mmc_test_multiple_rw test_data = {
+		.sg_len = sg_len,
+		.size = TEST_AREA_MAX_SIZE,
+		.len = ARRAY_SIZE(sg_len),
+		.do_write = true,
+		.do_nonblock_req = false,
+		.prepare = MMC_TEST_PREP_ERASE,
+	};
+
+	return mmc_test_rw_multiple_sg_len(test, &test_data);
+};
+
+/*
+ * Multiple non-blocking write 1 to 512 sg elements
+ */
+static int mmc_test_profile_sglen_wr_nonblock_perf(struct mmc_test_card *test)
+{
+	unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6,
+				 1 << 7, 1 << 8, 1 << 9};
+	struct mmc_test_multiple_rw test_data = {
+		.sg_len = sg_len,
+		.size = TEST_AREA_MAX_SIZE,
+		.len = ARRAY_SIZE(sg_len),
+		.do_write = true,
+		.do_nonblock_req = true,
+		.prepare = MMC_TEST_PREP_ERASE,
+	};
+
+	return mmc_test_rw_multiple_sg_len(test, &test_data);
+}
+
+/*
+ * Multiple blocking read 1 to 512 sg elements
+ */
+static int mmc_test_profile_sglen_r_blocking_perf(struct mmc_test_card *test)
+{
+	unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6,
+				 1 << 7, 1 << 8, 1 << 9};
+	struct mmc_test_multiple_rw test_data = {
+		.sg_len = sg_len,
+		.size = TEST_AREA_MAX_SIZE,
+		.len = ARRAY_SIZE(sg_len),
+		.do_write = false,
+		.do_nonblock_req = false,
+		.prepare = MMC_TEST_PREP_NONE,
+	};
+
+	return mmc_test_rw_multiple_sg_len(test, &test_data);
+}
+
+/*
+ * Multiple non-blocking read 1 to 512 sg elements
+ */
+static int mmc_test_profile_sglen_r_nonblock_perf(struct mmc_test_card *test)
+{
+	unsigned int sg_len[] = {1, 1 << 3, 1 << 4, 1 << 5, 1 << 6,
+				 1 << 7, 1 << 8, 1 << 9};
+	struct mmc_test_multiple_rw test_data = {
+		.sg_len = sg_len,
+		.size = TEST_AREA_MAX_SIZE,
+		.len = ARRAY_SIZE(sg_len),
+		.do_write = false,
+		.do_nonblock_req = true,
+		.prepare = MMC_TEST_PREP_NONE,
+	};
+
+	return mmc_test_rw_multiple_sg_len(test, &test_data);
+}
+
 static const struct mmc_test_case mmc_test_cases[] = {
 	{
 		.name = "Basic write (no data verification)",
@@ -2221,6 +2595,61 @@
 		.cleanup = mmc_test_area_cleanup,
 	},
 
+	{
+		.name = "Write performance with blocking req 4k to 4MB",
+		.prepare = mmc_test_area_prepare,
+		.run = mmc_test_profile_mult_write_blocking_perf,
+		.cleanup = mmc_test_area_cleanup,
+	},
+
+	{
+		.name = "Write performance with non-blocking req 4k to 4MB",
+		.prepare = mmc_test_area_prepare,
+		.run = mmc_test_profile_mult_write_nonblock_perf,
+		.cleanup = mmc_test_area_cleanup,
+	},
+
+	{
+		.name = "Read performance with blocking req 4k to 4MB",
+		.prepare = mmc_test_area_prepare,
+		.run = mmc_test_profile_mult_read_blocking_perf,
+		.cleanup = mmc_test_area_cleanup,
+	},
+
+	{
+		.name = "Read performance with non-blocking req 4k to 4MB",
+		.prepare = mmc_test_area_prepare,
+		.run = mmc_test_profile_mult_read_nonblock_perf,
+		.cleanup = mmc_test_area_cleanup,
+	},
+
+	{
+		.name = "Write performance blocking req 1 to 512 sg elems",
+		.prepare = mmc_test_area_prepare,
+		.run = mmc_test_profile_sglen_wr_blocking_perf,
+		.cleanup = mmc_test_area_cleanup,
+	},
+
+	{
+		.name = "Write performance non-blocking req 1 to 512 sg elems",
+		.prepare = mmc_test_area_prepare,
+		.run = mmc_test_profile_sglen_wr_nonblock_perf,
+		.cleanup = mmc_test_area_cleanup,
+	},
+
+	{
+		.name = "Read performance blocking req 1 to 512 sg elems",
+		.prepare = mmc_test_area_prepare,
+		.run = mmc_test_profile_sglen_r_blocking_perf,
+		.cleanup = mmc_test_area_cleanup,
+	},
+
+	{
+		.name = "Read performance non-blocking req 1 to 512 sg elems",
+		.prepare = mmc_test_area_prepare,
+		.run = mmc_test_profile_sglen_r_nonblock_perf,
+		.cleanup = mmc_test_area_cleanup,
+	},
 };
 
 static DEFINE_MUTEX(mmc_test_lock);
@@ -2445,6 +2874,32 @@
 	.release	= single_release,
 };
 
+static int mtf_testlist_show(struct seq_file *sf, void *data)
+{
+	int i;
+
+	mutex_lock(&mmc_test_lock);
+
+	for (i = 0; i < ARRAY_SIZE(mmc_test_cases); i++)
+		seq_printf(sf, "%d:\t%s\n", i+1, mmc_test_cases[i].name);
+
+	mutex_unlock(&mmc_test_lock);
+
+	return 0;
+}
+
+static int mtf_testlist_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mtf_testlist_show, inode->i_private);
+}
+
+static const struct file_operations mmc_test_fops_testlist = {
+	.open		= mtf_testlist_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static void mmc_test_free_file_test(struct mmc_card *card)
 {
 	struct mmc_test_dbgfs_file *df, *dfs;
@@ -2476,7 +2931,18 @@
 
 	if (IS_ERR_OR_NULL(file)) {
 		dev_err(&card->dev,
-			"Can't create file. Perhaps debugfs is disabled.\n");
+			"Can't create test. Perhaps debugfs is disabled.\n");
+		ret = -ENODEV;
+		goto err;
+	}
+
+	if (card->debugfs_root)
+		file = debugfs_create_file("testlist", S_IRUGO,
+			card->debugfs_root, card, &mmc_test_fops_testlist);
+
+	if (IS_ERR_OR_NULL(file)) {
+		dev_err(&card->dev,
+			"Can't create testlist. Perhaps debugfs is disabled.\n");
 		ret = -ENODEV;
 		goto err;
 	}
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 6413afa..45fb362 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -52,14 +52,18 @@
 	down(&mq->thread_sem);
 	do {
 		struct request *req = NULL;
+		struct mmc_queue_req *tmp;
 
 		spin_lock_irq(q->queue_lock);
 		set_current_state(TASK_INTERRUPTIBLE);
 		req = blk_fetch_request(q);
-		mq->req = req;
+		mq->mqrq_cur->req = req;
 		spin_unlock_irq(q->queue_lock);
 
-		if (!req) {
+		if (req || mq->mqrq_prev->req) {
+			set_current_state(TASK_RUNNING);
+			mq->issue_fn(mq, req);
+		} else {
 			if (kthread_should_stop()) {
 				set_current_state(TASK_RUNNING);
 				break;
@@ -67,11 +71,14 @@
 			up(&mq->thread_sem);
 			schedule();
 			down(&mq->thread_sem);
-			continue;
 		}
-		set_current_state(TASK_RUNNING);
 
-		mq->issue_fn(mq, req);
+		/* Current request becomes previous request and vice versa. */
+		mq->mqrq_prev->brq.mrq.data = NULL;
+		mq->mqrq_prev->req = NULL;
+		tmp = mq->mqrq_prev;
+		mq->mqrq_prev = mq->mqrq_cur;
+		mq->mqrq_cur = tmp;
 	} while (1);
 	up(&mq->thread_sem);
 
@@ -97,10 +104,46 @@
 		return;
 	}
 
-	if (!mq->req)
+	if (!mq->mqrq_cur->req && !mq->mqrq_prev->req)
 		wake_up_process(mq->thread);
 }
 
+struct scatterlist *mmc_alloc_sg(int sg_len, int *err)
+{
+	struct scatterlist *sg;
+
+	sg = kmalloc(sizeof(struct scatterlist)*sg_len, GFP_KERNEL);
+	if (!sg)
+		*err = -ENOMEM;
+	else {
+		*err = 0;
+		sg_init_table(sg, sg_len);
+	}
+
+	return sg;
+}
+
+static void mmc_queue_setup_discard(struct request_queue *q,
+				    struct mmc_card *card)
+{
+	unsigned max_discard;
+
+	max_discard = mmc_calc_max_discard(card);
+	if (!max_discard)
+		return;
+
+	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
+	q->limits.max_discard_sectors = max_discard;
+	if (card->erased_byte == 0)
+		q->limits.discard_zeroes_data = 1;
+	q->limits.discard_granularity = card->pref_erase << 9;
+	/* granularity must not be greater than max. discard */
+	if (card->pref_erase > max_discard)
+		q->limits.discard_granularity = 0;
+	if (mmc_can_secure_erase_trim(card))
+		queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q);
+}
+
 /**
  * mmc_init_queue - initialise a queue structure.
  * @mq: mmc queue
@@ -116,6 +159,8 @@
 	struct mmc_host *host = card->host;
 	u64 limit = BLK_BOUNCE_HIGH;
 	int ret;
+	struct mmc_queue_req *mqrq_cur = &mq->mqrq[0];
+	struct mmc_queue_req *mqrq_prev = &mq->mqrq[1];
 
 	if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
 		limit = *mmc_dev(host)->dma_mask;
@@ -125,21 +170,16 @@
 	if (!mq->queue)
 		return -ENOMEM;
 
+	memset(&mq->mqrq_cur, 0, sizeof(mq->mqrq_cur));
+	memset(&mq->mqrq_prev, 0, sizeof(mq->mqrq_prev));
+	mq->mqrq_cur = mqrq_cur;
+	mq->mqrq_prev = mqrq_prev;
 	mq->queue->queuedata = mq;
-	mq->req = NULL;
 
 	blk_queue_prep_rq(mq->queue, mmc_prep_request);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
-	if (mmc_can_erase(card)) {
-		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mq->queue);
-		mq->queue->limits.max_discard_sectors = UINT_MAX;
-		if (card->erased_byte == 0)
-			mq->queue->limits.discard_zeroes_data = 1;
-		mq->queue->limits.discard_granularity = card->pref_erase << 9;
-		if (mmc_can_secure_erase_trim(card))
-			queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD,
-						mq->queue);
-	}
+	if (mmc_can_erase(card))
+		mmc_queue_setup_discard(mq->queue, card);
 
 #ifdef CONFIG_MMC_BLOCK_BOUNCE
 	if (host->max_segs == 1) {
@@ -155,53 +195,64 @@
 			bouncesz = host->max_blk_count * 512;
 
 		if (bouncesz > 512) {
-			mq->bounce_buf = kmalloc(bouncesz, GFP_KERNEL);
-			if (!mq->bounce_buf) {
+			mqrq_cur->bounce_buf = kmalloc(bouncesz, GFP_KERNEL);
+			if (!mqrq_cur->bounce_buf) {
 				printk(KERN_WARNING "%s: unable to "
-					"allocate bounce buffer\n",
+					"allocate bounce cur buffer\n",
 					mmc_card_name(card));
 			}
+			mqrq_prev->bounce_buf = kmalloc(bouncesz, GFP_KERNEL);
+			if (!mqrq_prev->bounce_buf) {
+				printk(KERN_WARNING "%s: unable to "
+					"allocate bounce prev buffer\n",
+					mmc_card_name(card));
+				kfree(mqrq_cur->bounce_buf);
+				mqrq_cur->bounce_buf = NULL;
+			}
 		}
 
-		if (mq->bounce_buf) {
+		if (mqrq_cur->bounce_buf && mqrq_prev->bounce_buf) {
 			blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
 			blk_queue_max_hw_sectors(mq->queue, bouncesz / 512);
 			blk_queue_max_segments(mq->queue, bouncesz / 512);
 			blk_queue_max_segment_size(mq->queue, bouncesz);
 
-			mq->sg = kmalloc(sizeof(struct scatterlist),
-				GFP_KERNEL);
-			if (!mq->sg) {
-				ret = -ENOMEM;
+			mqrq_cur->sg = mmc_alloc_sg(1, &ret);
+			if (ret)
 				goto cleanup_queue;
-			}
-			sg_init_table(mq->sg, 1);
 
-			mq->bounce_sg = kmalloc(sizeof(struct scatterlist) *
-				bouncesz / 512, GFP_KERNEL);
-			if (!mq->bounce_sg) {
-				ret = -ENOMEM;
+			mqrq_cur->bounce_sg =
+				mmc_alloc_sg(bouncesz / 512, &ret);
+			if (ret)
 				goto cleanup_queue;
-			}
-			sg_init_table(mq->bounce_sg, bouncesz / 512);
+
+			mqrq_prev->sg = mmc_alloc_sg(1, &ret);
+			if (ret)
+				goto cleanup_queue;
+
+			mqrq_prev->bounce_sg =
+				mmc_alloc_sg(bouncesz / 512, &ret);
+			if (ret)
+				goto cleanup_queue;
 		}
 	}
 #endif
 
-	if (!mq->bounce_buf) {
+	if (!mqrq_cur->bounce_buf && !mqrq_prev->bounce_buf) {
 		blk_queue_bounce_limit(mq->queue, limit);
 		blk_queue_max_hw_sectors(mq->queue,
 			min(host->max_blk_count, host->max_req_size / 512));
 		blk_queue_max_segments(mq->queue, host->max_segs);
 		blk_queue_max_segment_size(mq->queue, host->max_seg_size);
 
-		mq->sg = kmalloc(sizeof(struct scatterlist) *
-			host->max_segs, GFP_KERNEL);
-		if (!mq->sg) {
-			ret = -ENOMEM;
+		mqrq_cur->sg = mmc_alloc_sg(host->max_segs, &ret);
+		if (ret)
 			goto cleanup_queue;
-		}
-		sg_init_table(mq->sg, host->max_segs);
+
+
+		mqrq_prev->sg = mmc_alloc_sg(host->max_segs, &ret);
+		if (ret)
+			goto cleanup_queue;
 	}
 
 	sema_init(&mq->thread_sem, 1);
@@ -216,16 +267,22 @@
 
 	return 0;
  free_bounce_sg:
- 	if (mq->bounce_sg)
- 		kfree(mq->bounce_sg);
- 	mq->bounce_sg = NULL;
+	kfree(mqrq_cur->bounce_sg);
+	mqrq_cur->bounce_sg = NULL;
+	kfree(mqrq_prev->bounce_sg);
+	mqrq_prev->bounce_sg = NULL;
+
  cleanup_queue:
- 	if (mq->sg)
-		kfree(mq->sg);
-	mq->sg = NULL;
-	if (mq->bounce_buf)
-		kfree(mq->bounce_buf);
-	mq->bounce_buf = NULL;
+	kfree(mqrq_cur->sg);
+	mqrq_cur->sg = NULL;
+	kfree(mqrq_cur->bounce_buf);
+	mqrq_cur->bounce_buf = NULL;
+
+	kfree(mqrq_prev->sg);
+	mqrq_prev->sg = NULL;
+	kfree(mqrq_prev->bounce_buf);
+	mqrq_prev->bounce_buf = NULL;
+
 	blk_cleanup_queue(mq->queue);
 	return ret;
 }
@@ -234,6 +291,8 @@
 {
 	struct request_queue *q = mq->queue;
 	unsigned long flags;
+	struct mmc_queue_req *mqrq_cur = mq->mqrq_cur;
+	struct mmc_queue_req *mqrq_prev = mq->mqrq_prev;
 
 	/* Make sure the queue isn't suspended, as that will deadlock */
 	mmc_queue_resume(mq);
@@ -247,16 +306,23 @@
 	blk_start_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
- 	if (mq->bounce_sg)
- 		kfree(mq->bounce_sg);
- 	mq->bounce_sg = NULL;
+	kfree(mqrq_cur->bounce_sg);
+	mqrq_cur->bounce_sg = NULL;
 
-	kfree(mq->sg);
-	mq->sg = NULL;
+	kfree(mqrq_cur->sg);
+	mqrq_cur->sg = NULL;
 
-	if (mq->bounce_buf)
-		kfree(mq->bounce_buf);
-	mq->bounce_buf = NULL;
+	kfree(mqrq_cur->bounce_buf);
+	mqrq_cur->bounce_buf = NULL;
+
+	kfree(mqrq_prev->bounce_sg);
+	mqrq_prev->bounce_sg = NULL;
+
+	kfree(mqrq_prev->sg);
+	mqrq_prev->sg = NULL;
+
+	kfree(mqrq_prev->bounce_buf);
+	mqrq_prev->bounce_buf = NULL;
 
 	mq->card = NULL;
 }
@@ -309,27 +375,27 @@
 /*
  * Prepare the sg list(s) to be handed of to the host driver
  */
-unsigned int mmc_queue_map_sg(struct mmc_queue *mq)
+unsigned int mmc_queue_map_sg(struct mmc_queue *mq, struct mmc_queue_req *mqrq)
 {
 	unsigned int sg_len;
 	size_t buflen;
 	struct scatterlist *sg;
 	int i;
 
-	if (!mq->bounce_buf)
-		return blk_rq_map_sg(mq->queue, mq->req, mq->sg);
+	if (!mqrq->bounce_buf)
+		return blk_rq_map_sg(mq->queue, mqrq->req, mqrq->sg);
 
-	BUG_ON(!mq->bounce_sg);
+	BUG_ON(!mqrq->bounce_sg);
 
-	sg_len = blk_rq_map_sg(mq->queue, mq->req, mq->bounce_sg);
+	sg_len = blk_rq_map_sg(mq->queue, mqrq->req, mqrq->bounce_sg);
 
-	mq->bounce_sg_len = sg_len;
+	mqrq->bounce_sg_len = sg_len;
 
 	buflen = 0;
-	for_each_sg(mq->bounce_sg, sg, sg_len, i)
+	for_each_sg(mqrq->bounce_sg, sg, sg_len, i)
 		buflen += sg->length;
 
-	sg_init_one(mq->sg, mq->bounce_buf, buflen);
+	sg_init_one(mqrq->sg, mqrq->bounce_buf, buflen);
 
 	return 1;
 }
@@ -338,31 +404,30 @@
  * If writing, bounce the data to the buffer before the request
  * is sent to the host driver
  */
-void mmc_queue_bounce_pre(struct mmc_queue *mq)
+void mmc_queue_bounce_pre(struct mmc_queue_req *mqrq)
 {
-	if (!mq->bounce_buf)
+	if (!mqrq->bounce_buf)
 		return;
 
-	if (rq_data_dir(mq->req) != WRITE)
+	if (rq_data_dir(mqrq->req) != WRITE)
 		return;
 
-	sg_copy_to_buffer(mq->bounce_sg, mq->bounce_sg_len,
-		mq->bounce_buf, mq->sg[0].length);
+	sg_copy_to_buffer(mqrq->bounce_sg, mqrq->bounce_sg_len,
+		mqrq->bounce_buf, mqrq->sg[0].length);
 }
 
 /*
  * If reading, bounce the data from the buffer after the request
  * has been handled by the host driver
  */
-void mmc_queue_bounce_post(struct mmc_queue *mq)
+void mmc_queue_bounce_post(struct mmc_queue_req *mqrq)
 {
-	if (!mq->bounce_buf)
+	if (!mqrq->bounce_buf)
 		return;
 
-	if (rq_data_dir(mq->req) != READ)
+	if (rq_data_dir(mqrq->req) != READ)
 		return;
 
-	sg_copy_from_buffer(mq->bounce_sg, mq->bounce_sg_len,
-		mq->bounce_buf, mq->sg[0].length);
+	sg_copy_from_buffer(mqrq->bounce_sg, mqrq->bounce_sg_len,
+		mqrq->bounce_buf, mqrq->sg[0].length);
 }
-
diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h
index 6223ef8..d2a1eb4 100644
--- a/drivers/mmc/card/queue.h
+++ b/drivers/mmc/card/queue.h
@@ -4,19 +4,35 @@
 struct request;
 struct task_struct;
 
+struct mmc_blk_request {
+	struct mmc_request	mrq;
+	struct mmc_command	sbc;
+	struct mmc_command	cmd;
+	struct mmc_command	stop;
+	struct mmc_data		data;
+};
+
+struct mmc_queue_req {
+	struct request		*req;
+	struct mmc_blk_request	brq;
+	struct scatterlist	*sg;
+	char			*bounce_buf;
+	struct scatterlist	*bounce_sg;
+	unsigned int		bounce_sg_len;
+	struct mmc_async_req	mmc_active;
+};
+
 struct mmc_queue {
 	struct mmc_card		*card;
 	struct task_struct	*thread;
 	struct semaphore	thread_sem;
 	unsigned int		flags;
-	struct request		*req;
 	int			(*issue_fn)(struct mmc_queue *, struct request *);
 	void			*data;
 	struct request_queue	*queue;
-	struct scatterlist	*sg;
-	char			*bounce_buf;
-	struct scatterlist	*bounce_sg;
-	unsigned int		bounce_sg_len;
+	struct mmc_queue_req	mqrq[2];
+	struct mmc_queue_req	*mqrq_cur;
+	struct mmc_queue_req	*mqrq_prev;
 };
 
 extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
@@ -25,8 +41,9 @@
 extern void mmc_queue_suspend(struct mmc_queue *);
 extern void mmc_queue_resume(struct mmc_queue *);
 
-extern unsigned int mmc_queue_map_sg(struct mmc_queue *);
-extern void mmc_queue_bounce_pre(struct mmc_queue *);
-extern void mmc_queue_bounce_post(struct mmc_queue *);
+extern unsigned int mmc_queue_map_sg(struct mmc_queue *,
+				     struct mmc_queue_req *);
+extern void mmc_queue_bounce_pre(struct mmc_queue_req *);
+extern void mmc_queue_bounce_post(struct mmc_queue_req *);
 
 #endif
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 7843efe..f091b43 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -198,9 +198,109 @@
 
 static void mmc_wait_done(struct mmc_request *mrq)
 {
-	complete(mrq->done_data);
+	complete(&mrq->completion);
 }
 
+static void __mmc_start_req(struct mmc_host *host, struct mmc_request *mrq)
+{
+	init_completion(&mrq->completion);
+	mrq->done = mmc_wait_done;
+	mmc_start_request(host, mrq);
+}
+
+static void mmc_wait_for_req_done(struct mmc_host *host,
+				  struct mmc_request *mrq)
+{
+	wait_for_completion(&mrq->completion);
+}
+
+/**
+ *	mmc_pre_req - Prepare for a new request
+ *	@host: MMC host to prepare command
+ *	@mrq: MMC request to prepare for
+ *	@is_first_req: true if there is no previous started request
+ *                     that may run in parellel to this call, otherwise false
+ *
+ *	mmc_pre_req() is called in prior to mmc_start_req() to let
+ *	host prepare for the new request. Preparation of a request may be
+ *	performed while another request is running on the host.
+ */
+static void mmc_pre_req(struct mmc_host *host, struct mmc_request *mrq,
+		 bool is_first_req)
+{
+	if (host->ops->pre_req)
+		host->ops->pre_req(host, mrq, is_first_req);
+}
+
+/**
+ *	mmc_post_req - Post process a completed request
+ *	@host: MMC host to post process command
+ *	@mrq: MMC request to post process for
+ *	@err: Error, if non zero, clean up any resources made in pre_req
+ *
+ *	Let the host post process a completed request. Post processing of
+ *	a request may be performed while another reuqest is running.
+ */
+static void mmc_post_req(struct mmc_host *host, struct mmc_request *mrq,
+			 int err)
+{
+	if (host->ops->post_req)
+		host->ops->post_req(host, mrq, err);
+}
+
+/**
+ *	mmc_start_req - start a non-blocking request
+ *	@host: MMC host to start command
+ *	@areq: async request to start
+ *	@error: out parameter returns 0 for success, otherwise non zero
+ *
+ *	Start a new MMC custom command request for a host.
+ *	If there is on ongoing async request wait for completion
+ *	of that request and start the new one and return.
+ *	Does not wait for the new request to complete.
+ *
+ *      Returns the completed request, NULL in case of none completed.
+ *	Wait for the an ongoing request (previoulsy started) to complete and
+ *	return the completed request. If there is no ongoing request, NULL
+ *	is returned without waiting. NULL is not an error condition.
+ */
+struct mmc_async_req *mmc_start_req(struct mmc_host *host,
+				    struct mmc_async_req *areq, int *error)
+{
+	int err = 0;
+	struct mmc_async_req *data = host->areq;
+
+	/* Prepare a new request */
+	if (areq)
+		mmc_pre_req(host, areq->mrq, !host->areq);
+
+	if (host->areq) {
+		mmc_wait_for_req_done(host, host->areq->mrq);
+		err = host->areq->err_check(host->card, host->areq);
+		if (err) {
+			mmc_post_req(host, host->areq->mrq, 0);
+			if (areq)
+				mmc_post_req(host, areq->mrq, -EINVAL);
+
+			host->areq = NULL;
+			goto out;
+		}
+	}
+
+	if (areq)
+		__mmc_start_req(host, areq->mrq);
+
+	if (host->areq)
+		mmc_post_req(host, host->areq->mrq, 0);
+
+	host->areq = areq;
+ out:
+	if (error)
+		*error = err;
+	return data;
+}
+EXPORT_SYMBOL(mmc_start_req);
+
 /**
  *	mmc_wait_for_req - start a request and wait for completion
  *	@host: MMC host to start command
@@ -212,16 +312,9 @@
  */
 void mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq)
 {
-	DECLARE_COMPLETION_ONSTACK(complete);
-
-	mrq->done_data = &complete;
-	mrq->done = mmc_wait_done;
-
-	mmc_start_request(host, mrq);
-
-	wait_for_completion(&complete);
+	__mmc_start_req(host, mrq);
+	mmc_wait_for_req_done(host, mrq);
 }
-
 EXPORT_SYMBOL(mmc_wait_for_req);
 
 /**
@@ -1516,6 +1609,82 @@
 }
 EXPORT_SYMBOL(mmc_erase_group_aligned);
 
+static unsigned int mmc_do_calc_max_discard(struct mmc_card *card,
+					    unsigned int arg)
+{
+	struct mmc_host *host = card->host;
+	unsigned int max_discard, x, y, qty = 0, max_qty, timeout;
+	unsigned int last_timeout = 0;
+
+	if (card->erase_shift)
+		max_qty = UINT_MAX >> card->erase_shift;
+	else if (mmc_card_sd(card))
+		max_qty = UINT_MAX;
+	else
+		max_qty = UINT_MAX / card->erase_size;
+
+	/* Find the largest qty with an OK timeout */
+	do {
+		y = 0;
+		for (x = 1; x && x <= max_qty && max_qty - x >= qty; x <<= 1) {
+			timeout = mmc_erase_timeout(card, arg, qty + x);
+			if (timeout > host->max_discard_to)
+				break;
+			if (timeout < last_timeout)
+				break;
+			last_timeout = timeout;
+			y = x;
+		}
+		qty += y;
+	} while (y);
+
+	if (!qty)
+		return 0;
+
+	if (qty == 1)
+		return 1;
+
+	/* Convert qty to sectors */
+	if (card->erase_shift)
+		max_discard = --qty << card->erase_shift;
+	else if (mmc_card_sd(card))
+		max_discard = qty;
+	else
+		max_discard = --qty * card->erase_size;
+
+	return max_discard;
+}
+
+unsigned int mmc_calc_max_discard(struct mmc_card *card)
+{
+	struct mmc_host *host = card->host;
+	unsigned int max_discard, max_trim;
+
+	if (!host->max_discard_to)
+		return UINT_MAX;
+
+	/*
+	 * Without erase_group_def set, MMC erase timeout depends on clock
+	 * frequence which can change.  In that case, the best choice is
+	 * just the preferred erase size.
+	 */
+	if (mmc_card_mmc(card) && !(card->ext_csd.erase_group_def & 1))
+		return card->pref_erase;
+
+	max_discard = mmc_do_calc_max_discard(card, MMC_ERASE_ARG);
+	if (mmc_can_trim(card)) {
+		max_trim = mmc_do_calc_max_discard(card, MMC_TRIM_ARG);
+		if (max_trim < max_discard)
+			max_discard = max_trim;
+	} else if (max_discard < card->erase_size) {
+		max_discard = 0;
+	}
+	pr_debug("%s: calculated max. discard sectors %u for timeout %u ms\n",
+		 mmc_hostname(host), max_discard, host->max_discard_to);
+	return max_discard;
+}
+EXPORT_SYMBOL(mmc_calc_max_discard);
+
 int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen)
 {
 	struct mmc_command cmd = {0};
@@ -1663,6 +1832,10 @@
 {
 	int ret = 0;
 
+#ifdef CONFIG_MMC_DEBUG
+	pr_info("%s: %s: powering down\n", mmc_hostname(host), __func__);
+#endif
+
 	mmc_bus_get(host);
 
 	if (!host->bus_ops || host->bus_dead || !host->bus_ops->power_restore) {
@@ -1685,6 +1858,10 @@
 {
 	int ret;
 
+#ifdef CONFIG_MMC_DEBUG
+	pr_info("%s: %s: powering up\n", mmc_hostname(host), __func__);
+#endif
+
 	mmc_bus_get(host);
 
 	if (!host->bus_ops || host->bus_dead || !host->bus_ops->power_restore) {
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index ff27741..633975f 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -409,52 +409,62 @@
 
 static int sd_select_driver_type(struct mmc_card *card, u8 *status)
 {
-	int host_drv_type = 0, card_drv_type = 0;
+	int host_drv_type = SD_DRIVER_TYPE_B;
+	int card_drv_type = SD_DRIVER_TYPE_B;
+	int drive_strength;
 	int err;
 
 	/*
 	 * If the host doesn't support any of the Driver Types A,C or D,
-	 * default Driver Type B is used.
+	 * or there is no board specific handler then default Driver
+	 * Type B is used.
 	 */
 	if (!(card->host->caps & (MMC_CAP_DRIVER_TYPE_A | MMC_CAP_DRIVER_TYPE_C
 	    | MMC_CAP_DRIVER_TYPE_D)))
 		return 0;
 
-	if (card->host->caps & MMC_CAP_DRIVER_TYPE_A) {
-		host_drv_type = MMC_SET_DRIVER_TYPE_A;
-		if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_A)
-			card_drv_type = MMC_SET_DRIVER_TYPE_A;
-		else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_B)
-			card_drv_type = MMC_SET_DRIVER_TYPE_B;
-		else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
-			card_drv_type = MMC_SET_DRIVER_TYPE_C;
-	} else if (card->host->caps & MMC_CAP_DRIVER_TYPE_C) {
-		host_drv_type = MMC_SET_DRIVER_TYPE_C;
-		if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
-			card_drv_type = MMC_SET_DRIVER_TYPE_C;
-	} else if (!(card->host->caps & MMC_CAP_DRIVER_TYPE_D)) {
-		/*
-		 * If we are here, that means only the default driver type
-		 * B is supported by the host.
-		 */
-		host_drv_type = MMC_SET_DRIVER_TYPE_B;
-		if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_B)
-			card_drv_type = MMC_SET_DRIVER_TYPE_B;
-		else if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
-			card_drv_type = MMC_SET_DRIVER_TYPE_C;
-	}
+	if (!card->host->ops->select_drive_strength)
+		return 0;
 
-	err = mmc_sd_switch(card, 1, 2, card_drv_type, status);
+	if (card->host->caps & MMC_CAP_DRIVER_TYPE_A)
+		host_drv_type |= SD_DRIVER_TYPE_A;
+
+	if (card->host->caps & MMC_CAP_DRIVER_TYPE_C)
+		host_drv_type |= SD_DRIVER_TYPE_C;
+
+	if (card->host->caps & MMC_CAP_DRIVER_TYPE_D)
+		host_drv_type |= SD_DRIVER_TYPE_D;
+
+	if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_A)
+		card_drv_type |= SD_DRIVER_TYPE_A;
+
+	if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_C)
+		card_drv_type |= SD_DRIVER_TYPE_C;
+
+	if (card->sw_caps.sd3_drv_type & SD_DRIVER_TYPE_D)
+		card_drv_type |= SD_DRIVER_TYPE_D;
+
+	/*
+	 * The drive strength that the hardware can support
+	 * depends on the board design.  Pass the appropriate
+	 * information and let the hardware specific code
+	 * return what is possible given the options
+	 */
+	drive_strength = card->host->ops->select_drive_strength(
+		card->sw_caps.uhs_max_dtr,
+		host_drv_type, card_drv_type);
+
+	err = mmc_sd_switch(card, 1, 2, drive_strength, status);
 	if (err)
 		return err;
 
-	if ((status[15] & 0xF) != card_drv_type) {
-		printk(KERN_WARNING "%s: Problem setting driver strength!\n",
+	if ((status[15] & 0xF) != drive_strength) {
+		printk(KERN_WARNING "%s: Problem setting drive strength!\n",
 			mmc_hostname(card->host));
 		return 0;
 	}
 
-	mmc_set_driver_type(card->host, host_drv_type);
+	mmc_set_driver_type(card->host, drive_strength);
 
 	return 0;
 }
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index d2565df..e4e6822 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -167,11 +167,8 @@
 	int ret = 0;
 
 	/* Make sure card is powered before invoking ->remove() */
-	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD) {
-		ret = pm_runtime_get_sync(dev);
-		if (ret < 0)
-			goto out;
-	}
+	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
+		pm_runtime_get_sync(dev);
 
 	drv->remove(func);
 
@@ -191,7 +188,6 @@
 	if (func->card->host->caps & MMC_CAP_POWER_OFF_CARD)
 		pm_runtime_put_sync(dev);
 
-out:
 	return ret;
 }
 
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 56dbf3f..8c87096 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -81,28 +81,32 @@
 
 	  If unsure, say Y.
 
-config MMC_SDHCI_OF
-	tristate "SDHCI support on OpenFirmware platforms"
-	depends on MMC_SDHCI && OF
+config MMC_SDHCI_PLTFM
+	tristate "SDHCI platform and OF driver helper"
+	depends on MMC_SDHCI
 	help
-	  This selects the OF support for Secure Digital Host Controller
-	  Interfaces.
+	  This selects the common helper functions support for Secure Digital
+	  Host Controller Interface based platform and OF drivers.
+
+	  If you have a controller with this interface, say Y or M here.
 
 	  If unsure, say N.
 
 config MMC_SDHCI_OF_ESDHC
-	bool "SDHCI OF support for the Freescale eSDHC controller"
-	depends on MMC_SDHCI_OF
+	tristate "SDHCI OF support for the Freescale eSDHC controller"
+	depends on MMC_SDHCI_PLTFM
 	depends on PPC_OF
 	select MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER
 	help
 	  This selects the Freescale eSDHC controller support.
 
+	  If you have a controller with this interface, say Y or M here.
+
 	  If unsure, say N.
 
 config MMC_SDHCI_OF_HLWD
-	bool "SDHCI OF support for the Nintendo Wii SDHCI controllers"
-	depends on MMC_SDHCI_OF
+	tristate "SDHCI OF support for the Nintendo Wii SDHCI controllers"
+	depends on MMC_SDHCI_PLTFM
 	depends on PPC_OF
 	select MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER
 	help
@@ -110,40 +114,36 @@
 	  found in the "Hollywood" chipset of the Nintendo Wii video game
 	  console.
 
-	  If unsure, say N.
-
-config MMC_SDHCI_PLTFM
-	tristate "SDHCI support on the platform specific bus"
-	depends on MMC_SDHCI
-	help
-	  This selects the platform specific bus support for Secure Digital Host
-	  Controller Interface.
-
 	  If you have a controller with this interface, say Y or M here.
 
 	  If unsure, say N.
 
 config MMC_SDHCI_CNS3XXX
-	bool "SDHCI support on the Cavium Networks CNS3xxx SoC"
+	tristate "SDHCI support on the Cavium Networks CNS3xxx SoC"
 	depends on ARCH_CNS3XXX
 	depends on MMC_SDHCI_PLTFM
 	help
 	  This selects the SDHCI support for CNS3xxx System-on-Chip devices.
 
+	  If you have a controller with this interface, say Y or M here.
+
 	  If unsure, say N.
 
 config MMC_SDHCI_ESDHC_IMX
-	bool "SDHCI platform support for the Freescale eSDHC i.MX controller"
-	depends on MMC_SDHCI_PLTFM && (ARCH_MX25 || ARCH_MX35 || ARCH_MX5)
+	tristate "SDHCI platform support for the Freescale eSDHC i.MX controller"
+	depends on ARCH_MX25 || ARCH_MX35 || ARCH_MX5
+	depends on MMC_SDHCI_PLTFM
 	select MMC_SDHCI_IO_ACCESSORS
 	help
 	  This selects the Freescale eSDHC controller support on the platform
 	  bus, found on platforms like mx35/51.
 
+	  If you have a controller with this interface, say Y or M here.
+
 	  If unsure, say N.
 
 config MMC_SDHCI_DOVE
-	bool "SDHCI support on Marvell's Dove SoC"
+	tristate "SDHCI support on Marvell's Dove SoC"
 	depends on ARCH_DOVE
 	depends on MMC_SDHCI_PLTFM
 	select MMC_SDHCI_IO_ACCESSORS
@@ -151,11 +151,14 @@
 	  This selects the Secure Digital Host Controller Interface in
 	  Marvell's Dove SoC.
 
+	  If you have a controller with this interface, say Y or M here.
+
 	  If unsure, say N.
 
 config MMC_SDHCI_TEGRA
-	bool "SDHCI platform support for the Tegra SD/MMC Controller"
-	depends on MMC_SDHCI_PLTFM && ARCH_TEGRA
+	tristate "SDHCI platform support for the Tegra SD/MMC Controller"
+	depends on ARCH_TEGRA
+	depends on MMC_SDHCI_PLTFM
 	select MMC_SDHCI_IO_ACCESSORS
 	help
 	  This selects the Tegra SD/MMC controller. If you have a Tegra
@@ -178,14 +181,28 @@
 
 	  If unsure, say N.
 
-config MMC_SDHCI_PXA
-	tristate "Marvell PXA168/PXA910/MMP2 SD Host Controller support"
-	depends on ARCH_PXA || ARCH_MMP
+config MMC_SDHCI_PXAV3
+	tristate "Marvell MMP2 SD Host Controller support (PXAV3)"
+	depends on CLKDEV_LOOKUP
 	select MMC_SDHCI
-	select MMC_SDHCI_IO_ACCESSORS
+	select MMC_SDHCI_PLTFM
+	default CPU_MMP2
 	help
-	  This selects the Marvell(R) PXA168/PXA910/MMP2 SD Host Controller.
-	  If you have a PXA168/PXA910/MMP2 platform with SD Host Controller
+	  This selects the Marvell(R) PXAV3 SD Host Controller.
+	  If you have a MMP2 platform with SD Host Controller
+	  and a card slot, say Y or M here.
+
+	  If unsure, say N.
+
+config MMC_SDHCI_PXAV2
+	tristate "Marvell PXA9XX SD Host Controller support (PXAV2)"
+	depends on CLKDEV_LOOKUP
+	select MMC_SDHCI
+	select MMC_SDHCI_PLTFM
+	default CPU_PXA910
+	help
+	  This selects the Marvell(R) PXAV2 SD Host Controller.
+	  If you have a PXA9XX platform with SD Host Controller
 	  and a card slot, say Y or M here.
 
 	  If unsure, say N.
@@ -281,13 +298,12 @@
 endchoice
 
 config MMC_ATMELMCI_DMA
-	bool "Atmel MCI DMA support (EXPERIMENTAL)"
-	depends on MMC_ATMELMCI && (AVR32 || ARCH_AT91SAM9G45) && DMA_ENGINE && EXPERIMENTAL
+	bool "Atmel MCI DMA support"
+	depends on MMC_ATMELMCI && (AVR32 || ARCH_AT91SAM9G45) && DMA_ENGINE
 	help
 	  Say Y here to have the Atmel MCI driver use a DMA engine to
 	  do data transfers and thus increase the throughput and
-	  reduce the CPU utilization. Note that this is highly
-	  experimental and may cause the driver to lock up.
+	  reduce the CPU utilization.
 
 	  If unsure, say N.
 
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 58a5cf7..b4b83f3 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -9,7 +9,8 @@
 obj-$(CONFIG_MMC_MXS)		+= mxs-mmc.o
 obj-$(CONFIG_MMC_SDHCI)		+= sdhci.o
 obj-$(CONFIG_MMC_SDHCI_PCI)	+= sdhci-pci.o
-obj-$(CONFIG_MMC_SDHCI_PXA)	+= sdhci-pxa.o
+obj-$(CONFIG_MMC_SDHCI_PXAV3)	+= sdhci-pxav3.o
+obj-$(CONFIG_MMC_SDHCI_PXAV2)	+= sdhci-pxav2.o
 obj-$(CONFIG_MMC_SDHCI_S3C)	+= sdhci-s3c.o
 obj-$(CONFIG_MMC_SDHCI_SPEAR)	+= sdhci-spear.o
 obj-$(CONFIG_MMC_WBSD)		+= wbsd.o
@@ -31,9 +32,7 @@
 obj-$(CONFIG_MMC_TMIO)		+= tmio_mmc.o
 obj-$(CONFIG_MMC_TMIO_CORE)	+= tmio_mmc_core.o
 tmio_mmc_core-y			:= tmio_mmc_pio.o
-ifneq ($(CONFIG_MMC_SDHI),n)
-tmio_mmc_core-y			+= tmio_mmc_dma.o
-endif
+tmio_mmc_core-$(subst m,y,$(CONFIG_MMC_SDHI))	+= tmio_mmc_dma.o
 obj-$(CONFIG_MMC_SDHI)		+= sh_mobile_sdhi.o
 obj-$(CONFIG_MMC_CB710)		+= cb710-mmc.o
 obj-$(CONFIG_MMC_VIA_SDMMC)	+= via-sdmmc.o
@@ -44,17 +43,13 @@
 obj-$(CONFIG_MMC_VUB300)	+= vub300.o
 obj-$(CONFIG_MMC_USHC)		+= ushc.o
 
-obj-$(CONFIG_MMC_SDHCI_PLTFM)			+= sdhci-platform.o
-sdhci-platform-y				:= sdhci-pltfm.o
-sdhci-platform-$(CONFIG_MMC_SDHCI_CNS3XXX)	+= sdhci-cns3xxx.o
-sdhci-platform-$(CONFIG_MMC_SDHCI_ESDHC_IMX)	+= sdhci-esdhc-imx.o
-sdhci-platform-$(CONFIG_MMC_SDHCI_DOVE)		+= sdhci-dove.o
-sdhci-platform-$(CONFIG_MMC_SDHCI_TEGRA)	+= sdhci-tegra.o
-
-obj-$(CONFIG_MMC_SDHCI_OF)	+= sdhci-of.o
-sdhci-of-y				:= sdhci-of-core.o
-sdhci-of-$(CONFIG_MMC_SDHCI_OF_ESDHC)	+= sdhci-of-esdhc.o
-sdhci-of-$(CONFIG_MMC_SDHCI_OF_HLWD)	+= sdhci-of-hlwd.o
+obj-$(CONFIG_MMC_SDHCI_PLTFM)		+= sdhci-pltfm.o
+obj-$(CONFIG_MMC_SDHCI_CNS3XXX)		+= sdhci-cns3xxx.o
+obj-$(CONFIG_MMC_SDHCI_ESDHC_IMX)	+= sdhci-esdhc-imx.o
+obj-$(CONFIG_MMC_SDHCI_DOVE)		+= sdhci-dove.o
+obj-$(CONFIG_MMC_SDHCI_TEGRA)		+= sdhci-tegra.o
+obj-$(CONFIG_MMC_SDHCI_OF_ESDHC)	+= sdhci-of-esdhc.o
+obj-$(CONFIG_MMC_SDHCI_OF_HLWD)		+= sdhci-of-hlwd.o
 
 ifeq ($(CONFIG_CB710_DEBUG),y)
 	CFLAGS-cb710-mmc	+= -DDEBUG
diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c
index d3e6a96..a4aa3af 100644
--- a/drivers/mmc/host/at91_mci.c
+++ b/drivers/mmc/host/at91_mci.c
@@ -77,7 +77,8 @@
 
 #include <mach/board.h>
 #include <mach/cpu.h>
-#include <mach/at91_mci.h>
+
+#include "at91_mci.h"
 
 #define DRIVER_NAME "at91_mci"
 
diff --git a/arch/arm/mach-at91/include/mach/at91_mci.h b/drivers/mmc/host/at91_mci.h
similarity index 98%
rename from arch/arm/mach-at91/include/mach/at91_mci.h
rename to drivers/mmc/host/at91_mci.h
index 02182c1..eec3a6b 100644
--- a/arch/arm/mach-at91/include/mach/at91_mci.h
+++ b/drivers/mmc/host/at91_mci.h
@@ -1,5 +1,5 @@
 /*
- * arch/arm/mach-at91/include/mach/at91_mci.h
+ * drivers/mmc/host/at91_mci.h
  *
  * Copyright (C) 2005 Ivan Kokshaysky
  * Copyright (C) SAN People
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index aa8039f..fa8cae1 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -203,6 +203,7 @@
 #define ATMCI_CARD_PRESENT	0
 #define ATMCI_CARD_NEED_INIT	1
 #define ATMCI_SHUTDOWN		2
+#define ATMCI_SUSPENDED		3
 
 	int			detect_pin;
 	int			wp_pin;
@@ -1878,10 +1879,72 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int atmci_suspend(struct device *dev)
+{
+	struct atmel_mci *host = dev_get_drvdata(dev);
+	int i;
+
+	 for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) {
+		struct atmel_mci_slot *slot = host->slot[i];
+		int ret;
+
+		if (!slot)
+			continue;
+		ret = mmc_suspend_host(slot->mmc);
+		if (ret < 0) {
+			while (--i >= 0) {
+				slot = host->slot[i];
+				if (slot
+				&& test_bit(ATMCI_SUSPENDED, &slot->flags)) {
+					mmc_resume_host(host->slot[i]->mmc);
+					clear_bit(ATMCI_SUSPENDED, &slot->flags);
+				}
+			}
+			return ret;
+		} else {
+			set_bit(ATMCI_SUSPENDED, &slot->flags);
+		}
+	}
+
+	return 0;
+}
+
+static int atmci_resume(struct device *dev)
+{
+	struct atmel_mci *host = dev_get_drvdata(dev);
+	int i;
+	int ret = 0;
+
+	for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) {
+		struct atmel_mci_slot *slot = host->slot[i];
+		int err;
+
+		slot = host->slot[i];
+		if (!slot)
+			continue;
+		if (!test_bit(ATMCI_SUSPENDED, &slot->flags))
+			continue;
+		err = mmc_resume_host(slot->mmc);
+		if (err < 0)
+			ret = err;
+		else
+			clear_bit(ATMCI_SUSPENDED, &slot->flags);
+	}
+
+	return ret;
+}
+static SIMPLE_DEV_PM_OPS(atmci_pm, atmci_suspend, atmci_resume);
+#define ATMCI_PM_OPS	(&atmci_pm)
+#else
+#define ATMCI_PM_OPS	NULL
+#endif
+
 static struct platform_driver atmci_driver = {
 	.remove		= __exit_p(atmci_remove),
 	.driver		= {
 		.name		= "atmel_mci",
+		.pm		= ATMCI_PM_OPS,
 	},
 };
 
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 66dcddb..0c839d3 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -33,6 +33,7 @@
 #include <linux/mmc/dw_mmc.h>
 #include <linux/bitops.h>
 #include <linux/regulator/consumer.h>
+#include <linux/workqueue.h>
 
 #include "dw_mmc.h"
 
@@ -100,6 +101,8 @@
 	int			last_detect_state;
 };
 
+static struct workqueue_struct *dw_mci_card_workqueue;
+
 #if defined(CONFIG_DEBUG_FS)
 static int dw_mci_req_show(struct seq_file *s, void *v)
 {
@@ -284,7 +287,7 @@
 /* DMA interface functions */
 static void dw_mci_stop_dma(struct dw_mci *host)
 {
-	if (host->use_dma) {
+	if (host->using_dma) {
 		host->dma_ops->stop(host);
 		host->dma_ops->cleanup(host);
 	} else {
@@ -432,6 +435,8 @@
 	unsigned int i, direction, sg_len;
 	u32 temp;
 
+	host->using_dma = 0;
+
 	/* If we don't have a channel, we can't do DMA */
 	if (!host->use_dma)
 		return -ENODEV;
@@ -451,6 +456,8 @@
 			return -EINVAL;
 	}
 
+	host->using_dma = 1;
+
 	if (data->flags & MMC_DATA_READ)
 		direction = DMA_FROM_DEVICE;
 	else
@@ -489,14 +496,18 @@
 	host->sg = NULL;
 	host->data = data;
 
+	if (data->flags & MMC_DATA_READ)
+		host->dir_status = DW_MCI_RECV_STATUS;
+	else
+		host->dir_status = DW_MCI_SEND_STATUS;
+
 	if (dw_mci_submit_data_dma(host, data)) {
 		host->sg = data->sg;
 		host->pio_offset = 0;
-		if (data->flags & MMC_DATA_READ)
-			host->dir_status = DW_MCI_RECV_STATUS;
-		else
-			host->dir_status = DW_MCI_SEND_STATUS;
+		host->part_buf_start = 0;
+		host->part_buf_count = 0;
 
+		mci_writel(host, RINTSTS, SDMMC_INT_TXDR | SDMMC_INT_RXDR);
 		temp = mci_readl(host, INTMASK);
 		temp |= SDMMC_INT_TXDR | SDMMC_INT_RXDR;
 		mci_writel(host, INTMASK, temp);
@@ -574,7 +585,7 @@
 	}
 
 	/* Set the current slot bus width */
-	mci_writel(host, CTYPE, slot->ctype);
+	mci_writel(host, CTYPE, (slot->ctype << slot->id));
 }
 
 static void dw_mci_start_request(struct dw_mci *host,
@@ -624,13 +635,13 @@
 		host->stop_cmdr = dw_mci_prepare_command(slot->mmc, mrq->stop);
 }
 
+/* must be called with host->lock held */
 static void dw_mci_queue_request(struct dw_mci *host, struct dw_mci_slot *slot,
 				 struct mmc_request *mrq)
 {
 	dev_vdbg(&slot->mmc->class_dev, "queue request: state=%d\n",
 		 host->state);
 
-	spin_lock_bh(&host->lock);
 	slot->mrq = mrq;
 
 	if (host->state == STATE_IDLE) {
@@ -639,8 +650,6 @@
 	} else {
 		list_add_tail(&slot->queue_node, &host->queue);
 	}
-
-	spin_unlock_bh(&host->lock);
 }
 
 static void dw_mci_request(struct mmc_host *mmc, struct mmc_request *mrq)
@@ -650,14 +659,23 @@
 
 	WARN_ON(slot->mrq);
 
+	/*
+	 * The check for card presence and queueing of the request must be
+	 * atomic, otherwise the card could be removed in between and the
+	 * request wouldn't fail until another card was inserted.
+	 */
+	spin_lock_bh(&host->lock);
+
 	if (!test_bit(DW_MMC_CARD_PRESENT, &slot->flags)) {
+		spin_unlock_bh(&host->lock);
 		mrq->cmd->error = -ENOMEDIUM;
 		mmc_request_done(mmc, mrq);
 		return;
 	}
 
-	/* We don't support multiple blocks of weird lengths. */
 	dw_mci_queue_request(host, slot, mrq);
+
+	spin_unlock_bh(&host->lock);
 }
 
 static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
@@ -831,7 +849,7 @@
 	struct mmc_command *cmd;
 	enum dw_mci_state state;
 	enum dw_mci_state prev_state;
-	u32 status;
+	u32 status, ctrl;
 
 	spin_lock(&host->lock);
 
@@ -891,13 +909,19 @@
 
 			if (status & DW_MCI_DATA_ERROR_FLAGS) {
 				if (status & SDMMC_INT_DTO) {
-					dev_err(&host->pdev->dev,
-						"data timeout error\n");
 					data->error = -ETIMEDOUT;
 				} else if (status & SDMMC_INT_DCRC) {
-					dev_err(&host->pdev->dev,
-						"data CRC error\n");
 					data->error = -EILSEQ;
+				} else if (status & SDMMC_INT_EBE &&
+					   host->dir_status ==
+							DW_MCI_SEND_STATUS) {
+					/*
+					 * No data CRC status was returned.
+					 * The number of bytes transferred will
+					 * be exaggerated in PIO mode.
+					 */
+					data->bytes_xfered = 0;
+					data->error = -ETIMEDOUT;
 				} else {
 					dev_err(&host->pdev->dev,
 						"data FIFO error "
@@ -905,6 +929,16 @@
 						status);
 					data->error = -EIO;
 				}
+				/*
+				 * After an error, there may be data lingering
+				 * in the FIFO, so reset it - doing so
+				 * generates a block interrupt, hence setting
+				 * the scatter-gather pointer to NULL.
+				 */
+				host->sg = NULL;
+				ctrl = mci_readl(host, CTRL);
+				ctrl |= SDMMC_CTRL_FIFO_RESET;
+				mci_writel(host, CTRL, ctrl);
 			} else {
 				data->bytes_xfered = data->blocks * data->blksz;
 				data->error = 0;
@@ -946,84 +980,278 @@
 
 }
 
+/* push final bytes to part_buf, only use during push */
+static void dw_mci_set_part_bytes(struct dw_mci *host, void *buf, int cnt)
+{
+	memcpy((void *)&host->part_buf, buf, cnt);
+	host->part_buf_count = cnt;
+}
+
+/* append bytes to part_buf, only use during push */
+static int dw_mci_push_part_bytes(struct dw_mci *host, void *buf, int cnt)
+{
+	cnt = min(cnt, (1 << host->data_shift) - host->part_buf_count);
+	memcpy((void *)&host->part_buf + host->part_buf_count, buf, cnt);
+	host->part_buf_count += cnt;
+	return cnt;
+}
+
+/* pull first bytes from part_buf, only use during pull */
+static int dw_mci_pull_part_bytes(struct dw_mci *host, void *buf, int cnt)
+{
+	cnt = min(cnt, (int)host->part_buf_count);
+	if (cnt) {
+		memcpy(buf, (void *)&host->part_buf + host->part_buf_start,
+		       cnt);
+		host->part_buf_count -= cnt;
+		host->part_buf_start += cnt;
+	}
+	return cnt;
+}
+
+/* pull final bytes from the part_buf, assuming it's just been filled */
+static void dw_mci_pull_final_bytes(struct dw_mci *host, void *buf, int cnt)
+{
+	memcpy(buf, &host->part_buf, cnt);
+	host->part_buf_start = cnt;
+	host->part_buf_count = (1 << host->data_shift) - cnt;
+}
+
 static void dw_mci_push_data16(struct dw_mci *host, void *buf, int cnt)
 {
-	u16 *pdata = (u16 *)buf;
-
-	WARN_ON(cnt % 2 != 0);
-
-	cnt = cnt >> 1;
-	while (cnt > 0) {
-		mci_writew(host, DATA, *pdata++);
-		cnt--;
+	/* try and push anything in the part_buf */
+	if (unlikely(host->part_buf_count)) {
+		int len = dw_mci_push_part_bytes(host, buf, cnt);
+		buf += len;
+		cnt -= len;
+		if (!sg_next(host->sg) || host->part_buf_count == 2) {
+			mci_writew(host, DATA, host->part_buf16);
+			host->part_buf_count = 0;
+		}
+	}
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	if (unlikely((unsigned long)buf & 0x1)) {
+		while (cnt >= 2) {
+			u16 aligned_buf[64];
+			int len = min(cnt & -2, (int)sizeof(aligned_buf));
+			int items = len >> 1;
+			int i;
+			/* memcpy from input buffer into aligned buffer */
+			memcpy(aligned_buf, buf, len);
+			buf += len;
+			cnt -= len;
+			/* push data from aligned buffer into fifo */
+			for (i = 0; i < items; ++i)
+				mci_writew(host, DATA, aligned_buf[i]);
+		}
+	} else
+#endif
+	{
+		u16 *pdata = buf;
+		for (; cnt >= 2; cnt -= 2)
+			mci_writew(host, DATA, *pdata++);
+		buf = pdata;
+	}
+	/* put anything remaining in the part_buf */
+	if (cnt) {
+		dw_mci_set_part_bytes(host, buf, cnt);
+		if (!sg_next(host->sg))
+			mci_writew(host, DATA, host->part_buf16);
 	}
 }
 
 static void dw_mci_pull_data16(struct dw_mci *host, void *buf, int cnt)
 {
-	u16 *pdata = (u16 *)buf;
-
-	WARN_ON(cnt % 2 != 0);
-
-	cnt = cnt >> 1;
-	while (cnt > 0) {
-		*pdata++ = mci_readw(host, DATA);
-		cnt--;
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	if (unlikely((unsigned long)buf & 0x1)) {
+		while (cnt >= 2) {
+			/* pull data from fifo into aligned buffer */
+			u16 aligned_buf[64];
+			int len = min(cnt & -2, (int)sizeof(aligned_buf));
+			int items = len >> 1;
+			int i;
+			for (i = 0; i < items; ++i)
+				aligned_buf[i] = mci_readw(host, DATA);
+			/* memcpy from aligned buffer into output buffer */
+			memcpy(buf, aligned_buf, len);
+			buf += len;
+			cnt -= len;
+		}
+	} else
+#endif
+	{
+		u16 *pdata = buf;
+		for (; cnt >= 2; cnt -= 2)
+			*pdata++ = mci_readw(host, DATA);
+		buf = pdata;
+	}
+	if (cnt) {
+		host->part_buf16 = mci_readw(host, DATA);
+		dw_mci_pull_final_bytes(host, buf, cnt);
 	}
 }
 
 static void dw_mci_push_data32(struct dw_mci *host, void *buf, int cnt)
 {
-	u32 *pdata = (u32 *)buf;
-
-	WARN_ON(cnt % 4 != 0);
-	WARN_ON((unsigned long)pdata & 0x3);
-
-	cnt = cnt >> 2;
-	while (cnt > 0) {
-		mci_writel(host, DATA, *pdata++);
-		cnt--;
+	/* try and push anything in the part_buf */
+	if (unlikely(host->part_buf_count)) {
+		int len = dw_mci_push_part_bytes(host, buf, cnt);
+		buf += len;
+		cnt -= len;
+		if (!sg_next(host->sg) || host->part_buf_count == 4) {
+			mci_writel(host, DATA, host->part_buf32);
+			host->part_buf_count = 0;
+		}
+	}
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	if (unlikely((unsigned long)buf & 0x3)) {
+		while (cnt >= 4) {
+			u32 aligned_buf[32];
+			int len = min(cnt & -4, (int)sizeof(aligned_buf));
+			int items = len >> 2;
+			int i;
+			/* memcpy from input buffer into aligned buffer */
+			memcpy(aligned_buf, buf, len);
+			buf += len;
+			cnt -= len;
+			/* push data from aligned buffer into fifo */
+			for (i = 0; i < items; ++i)
+				mci_writel(host, DATA, aligned_buf[i]);
+		}
+	} else
+#endif
+	{
+		u32 *pdata = buf;
+		for (; cnt >= 4; cnt -= 4)
+			mci_writel(host, DATA, *pdata++);
+		buf = pdata;
+	}
+	/* put anything remaining in the part_buf */
+	if (cnt) {
+		dw_mci_set_part_bytes(host, buf, cnt);
+		if (!sg_next(host->sg))
+			mci_writel(host, DATA, host->part_buf32);
 	}
 }
 
 static void dw_mci_pull_data32(struct dw_mci *host, void *buf, int cnt)
 {
-	u32 *pdata = (u32 *)buf;
-
-	WARN_ON(cnt % 4 != 0);
-	WARN_ON((unsigned long)pdata & 0x3);
-
-	cnt = cnt >> 2;
-	while (cnt > 0) {
-		*pdata++ = mci_readl(host, DATA);
-		cnt--;
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	if (unlikely((unsigned long)buf & 0x3)) {
+		while (cnt >= 4) {
+			/* pull data from fifo into aligned buffer */
+			u32 aligned_buf[32];
+			int len = min(cnt & -4, (int)sizeof(aligned_buf));
+			int items = len >> 2;
+			int i;
+			for (i = 0; i < items; ++i)
+				aligned_buf[i] = mci_readl(host, DATA);
+			/* memcpy from aligned buffer into output buffer */
+			memcpy(buf, aligned_buf, len);
+			buf += len;
+			cnt -= len;
+		}
+	} else
+#endif
+	{
+		u32 *pdata = buf;
+		for (; cnt >= 4; cnt -= 4)
+			*pdata++ = mci_readl(host, DATA);
+		buf = pdata;
+	}
+	if (cnt) {
+		host->part_buf32 = mci_readl(host, DATA);
+		dw_mci_pull_final_bytes(host, buf, cnt);
 	}
 }
 
 static void dw_mci_push_data64(struct dw_mci *host, void *buf, int cnt)
 {
-	u64 *pdata = (u64 *)buf;
-
-	WARN_ON(cnt % 8 != 0);
-
-	cnt = cnt >> 3;
-	while (cnt > 0) {
-		mci_writeq(host, DATA, *pdata++);
-		cnt--;
+	/* try and push anything in the part_buf */
+	if (unlikely(host->part_buf_count)) {
+		int len = dw_mci_push_part_bytes(host, buf, cnt);
+		buf += len;
+		cnt -= len;
+		if (!sg_next(host->sg) || host->part_buf_count == 8) {
+			mci_writew(host, DATA, host->part_buf);
+			host->part_buf_count = 0;
+		}
+	}
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	if (unlikely((unsigned long)buf & 0x7)) {
+		while (cnt >= 8) {
+			u64 aligned_buf[16];
+			int len = min(cnt & -8, (int)sizeof(aligned_buf));
+			int items = len >> 3;
+			int i;
+			/* memcpy from input buffer into aligned buffer */
+			memcpy(aligned_buf, buf, len);
+			buf += len;
+			cnt -= len;
+			/* push data from aligned buffer into fifo */
+			for (i = 0; i < items; ++i)
+				mci_writeq(host, DATA, aligned_buf[i]);
+		}
+	} else
+#endif
+	{
+		u64 *pdata = buf;
+		for (; cnt >= 8; cnt -= 8)
+			mci_writeq(host, DATA, *pdata++);
+		buf = pdata;
+	}
+	/* put anything remaining in the part_buf */
+	if (cnt) {
+		dw_mci_set_part_bytes(host, buf, cnt);
+		if (!sg_next(host->sg))
+			mci_writeq(host, DATA, host->part_buf);
 	}
 }
 
 static void dw_mci_pull_data64(struct dw_mci *host, void *buf, int cnt)
 {
-	u64 *pdata = (u64 *)buf;
-
-	WARN_ON(cnt % 8 != 0);
-
-	cnt = cnt >> 3;
-	while (cnt > 0) {
-		*pdata++ = mci_readq(host, DATA);
-		cnt--;
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	if (unlikely((unsigned long)buf & 0x7)) {
+		while (cnt >= 8) {
+			/* pull data from fifo into aligned buffer */
+			u64 aligned_buf[16];
+			int len = min(cnt & -8, (int)sizeof(aligned_buf));
+			int items = len >> 3;
+			int i;
+			for (i = 0; i < items; ++i)
+				aligned_buf[i] = mci_readq(host, DATA);
+			/* memcpy from aligned buffer into output buffer */
+			memcpy(buf, aligned_buf, len);
+			buf += len;
+			cnt -= len;
+		}
+	} else
+#endif
+	{
+		u64 *pdata = buf;
+		for (; cnt >= 8; cnt -= 8)
+			*pdata++ = mci_readq(host, DATA);
+		buf = pdata;
 	}
+	if (cnt) {
+		host->part_buf = mci_readq(host, DATA);
+		dw_mci_pull_final_bytes(host, buf, cnt);
+	}
+}
+
+static void dw_mci_pull_data(struct dw_mci *host, void *buf, int cnt)
+{
+	int len;
+
+	/* get remaining partial bytes */
+	len = dw_mci_pull_part_bytes(host, buf, cnt);
+	if (unlikely(len == cnt))
+		return;
+	buf += len;
+	cnt -= len;
+
+	/* get the rest of the data */
+	host->pull_data(host, buf, cnt);
 }
 
 static void dw_mci_read_data_pio(struct dw_mci *host)
@@ -1037,9 +1265,10 @@
 	unsigned int nbytes = 0, len;
 
 	do {
-		len = SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift;
+		len = host->part_buf_count +
+			(SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift);
 		if (offset + len <= sg->length) {
-			host->pull_data(host, (void *)(buf + offset), len);
+			dw_mci_pull_data(host, (void *)(buf + offset), len);
 
 			offset += len;
 			nbytes += len;
@@ -1055,8 +1284,8 @@
 			}
 		} else {
 			unsigned int remaining = sg->length - offset;
-			host->pull_data(host, (void *)(buf + offset),
-					remaining);
+			dw_mci_pull_data(host, (void *)(buf + offset),
+					 remaining);
 			nbytes += remaining;
 
 			flush_dcache_page(sg_page(sg));
@@ -1066,7 +1295,7 @@
 
 			offset = len - remaining;
 			buf = sg_virt(sg);
-			host->pull_data(host, buf, offset);
+			dw_mci_pull_data(host, buf, offset);
 			nbytes += offset;
 		}
 
@@ -1083,7 +1312,6 @@
 			return;
 		}
 	} while (status & SDMMC_INT_RXDR); /*if the RXDR is ready read again*/
-	len = SDMMC_GET_FCNT(mci_readl(host, STATUS));
 	host->pio_offset = offset;
 	data->bytes_xfered += nbytes;
 	return;
@@ -1105,8 +1333,9 @@
 	unsigned int nbytes = 0, len;
 
 	do {
-		len = SDMMC_FIFO_SZ -
-			(SDMMC_GET_FCNT(mci_readl(host, STATUS)) << shift);
+		len = ((host->fifo_depth -
+			SDMMC_GET_FCNT(mci_readl(host, STATUS))) << shift)
+			- host->part_buf_count;
 		if (offset + len <= sg->length) {
 			host->push_data(host, (void *)(buf + offset), len);
 
@@ -1151,10 +1380,8 @@
 			return;
 		}
 	} while (status & SDMMC_INT_TXDR); /* if TXDR write again */
-
 	host->pio_offset = offset;
 	data->bytes_xfered += nbytes;
-
 	return;
 
 done:
@@ -1202,7 +1429,6 @@
 			host->cmd_status = status;
 			smp_wmb();
 			set_bit(EVENT_CMD_COMPLETE, &host->pending_events);
-			tasklet_schedule(&host->tasklet);
 		}
 
 		if (pending & DW_MCI_DATA_ERROR_FLAGS) {
@@ -1211,7 +1437,9 @@
 			host->data_status = status;
 			smp_wmb();
 			set_bit(EVENT_DATA_ERROR, &host->pending_events);
-			tasklet_schedule(&host->tasklet);
+			if (!(pending & (SDMMC_INT_DTO | SDMMC_INT_DCRC |
+					 SDMMC_INT_SBE | SDMMC_INT_EBE)))
+				tasklet_schedule(&host->tasklet);
 		}
 
 		if (pending & SDMMC_INT_DATA_OVER) {
@@ -1229,13 +1457,13 @@
 
 		if (pending & SDMMC_INT_RXDR) {
 			mci_writel(host, RINTSTS, SDMMC_INT_RXDR);
-			if (host->sg)
+			if (host->dir_status == DW_MCI_RECV_STATUS && host->sg)
 				dw_mci_read_data_pio(host);
 		}
 
 		if (pending & SDMMC_INT_TXDR) {
 			mci_writel(host, RINTSTS, SDMMC_INT_TXDR);
-			if (host->sg)
+			if (host->dir_status == DW_MCI_SEND_STATUS && host->sg)
 				dw_mci_write_data_pio(host);
 		}
 
@@ -1246,7 +1474,7 @@
 
 		if (pending & SDMMC_INT_CD) {
 			mci_writel(host, RINTSTS, SDMMC_INT_CD);
-			tasklet_schedule(&host->card_tasklet);
+			queue_work(dw_mci_card_workqueue, &host->card_work);
 		}
 
 	} while (pass_count++ < 5);
@@ -1265,9 +1493,9 @@
 	return IRQ_HANDLED;
 }
 
-static void dw_mci_tasklet_card(unsigned long data)
+static void dw_mci_work_routine_card(struct work_struct *work)
 {
-	struct dw_mci *host = (struct dw_mci *)data;
+	struct dw_mci *host = container_of(work, struct dw_mci, card_work);
 	int i;
 
 	for (i = 0; i < host->num_slots; i++) {
@@ -1279,22 +1507,21 @@
 
 		present = dw_mci_get_cd(mmc);
 		while (present != slot->last_detect_state) {
-			spin_lock(&host->lock);
-
 			dev_dbg(&slot->mmc->class_dev, "card %s\n",
 				present ? "inserted" : "removed");
 
+			/* Power up slot (before spin_lock, may sleep) */
+			if (present != 0 && host->pdata->setpower)
+				host->pdata->setpower(slot->id, mmc->ocr_avail);
+
+			spin_lock_bh(&host->lock);
+
 			/* Card change detected */
 			slot->last_detect_state = present;
 
-			/* Power up slot */
-			if (present != 0) {
-				if (host->pdata->setpower)
-					host->pdata->setpower(slot->id,
-							      mmc->ocr_avail);
-
+			/* Mark card as present if applicable */
+			if (present != 0)
 				set_bit(DW_MMC_CARD_PRESENT, &slot->flags);
-			}
 
 			/* Clean up queue if present */
 			mrq = slot->mrq;
@@ -1344,8 +1571,6 @@
 
 			/* Power down slot */
 			if (present == 0) {
-				if (host->pdata->setpower)
-					host->pdata->setpower(slot->id, 0);
 				clear_bit(DW_MMC_CARD_PRESENT, &slot->flags);
 
 				/*
@@ -1367,7 +1592,12 @@
 
 			}
 
-			spin_unlock(&host->lock);
+			spin_unlock_bh(&host->lock);
+
+			/* Power down slot (after spin_unlock, may sleep) */
+			if (present == 0 && host->pdata->setpower)
+				host->pdata->setpower(slot->id, 0);
+
 			present = dw_mci_get_cd(mmc);
 		}
 
@@ -1467,7 +1697,7 @@
 	 * Card may have been plugged in prior to boot so we
 	 * need to run the detect tasklet
 	 */
-	tasklet_schedule(&host->card_tasklet);
+	queue_work(dw_mci_card_workqueue, &host->card_work);
 
 	return 0;
 }
@@ -1645,8 +1875,19 @@
 	 * FIFO threshold settings  RxMark  = fifo_size / 2 - 1,
 	 *                          Tx Mark = fifo_size / 2 DMA Size = 8
 	 */
-	fifo_size = mci_readl(host, FIFOTH);
-	fifo_size = (fifo_size >> 16) & 0x7ff;
+	if (!host->pdata->fifo_depth) {
+		/*
+		 * Power-on value of RX_WMark is FIFO_DEPTH-1, but this may
+		 * have been overwritten by the bootloader, just like we're
+		 * about to do, so if you know the value for your hardware, you
+		 * should put it in the platform data.
+		 */
+		fifo_size = mci_readl(host, FIFOTH);
+		fifo_size = 1 + ((fifo_size >> 16) & 0x7ff);
+	} else {
+		fifo_size = host->pdata->fifo_depth;
+	}
+	host->fifo_depth = fifo_size;
 	host->fifoth_val = ((0x2 << 28) | ((fifo_size/2 - 1) << 16) |
 			((fifo_size/2) << 0));
 	mci_writel(host, FIFOTH, host->fifoth_val);
@@ -1656,12 +1897,15 @@
 	mci_writel(host, CLKSRC, 0);
 
 	tasklet_init(&host->tasklet, dw_mci_tasklet_func, (unsigned long)host);
-	tasklet_init(&host->card_tasklet,
-		     dw_mci_tasklet_card, (unsigned long)host);
+	dw_mci_card_workqueue = alloc_workqueue("dw-mci-card",
+			WQ_MEM_RECLAIM | WQ_NON_REENTRANT, 1);
+	if (!dw_mci_card_workqueue)
+		goto err_dmaunmap;
+	INIT_WORK(&host->card_work, dw_mci_work_routine_card);
 
 	ret = request_irq(irq, dw_mci_interrupt, 0, "dw-mci", host);
 	if (ret)
-		goto err_dmaunmap;
+		goto err_workqueue;
 
 	platform_set_drvdata(pdev, host);
 
@@ -1690,7 +1934,9 @@
 	mci_writel(host, CTRL, SDMMC_CTRL_INT_ENABLE); /* Enable mci interrupt */
 
 	dev_info(&pdev->dev, "DW MMC controller at irq %d, "
-		 "%d bit host data width\n", irq, width);
+		 "%d bit host data width, "
+		 "%u deep fifo\n",
+		 irq, width, fifo_size);
 	if (host->quirks & DW_MCI_QUIRK_IDMAC_DTO)
 		dev_info(&pdev->dev, "Internal DMAC interrupt fix enabled.\n");
 
@@ -1705,6 +1951,9 @@
 	}
 	free_irq(irq, host);
 
+err_workqueue:
+	destroy_workqueue(dw_mci_card_workqueue);
+
 err_dmaunmap:
 	if (host->use_dma && host->dma_ops->exit)
 		host->dma_ops->exit(host);
@@ -1744,6 +1993,7 @@
 	mci_writel(host, CLKSRC, 0);
 
 	free_irq(platform_get_irq(pdev, 0), host);
+	destroy_workqueue(dw_mci_card_workqueue);
 	dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma);
 
 	if (host->use_dma && host->dma_ops->exit)
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 23c662a..027d377 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -118,7 +118,6 @@
 #define SDMMC_CMD_INDX(n)		((n) & 0x1F)
 /* Status register defines */
 #define SDMMC_GET_FCNT(x)		(((x)>>17) & 0x1FF)
-#define SDMMC_FIFO_SZ			32
 /* Internal DMAC interrupt defines */
 #define SDMMC_IDMAC_INT_AI		BIT(9)
 #define SDMMC_IDMAC_INT_NI		BIT(8)
@@ -134,22 +133,22 @@
 
 /* Register access macros */
 #define mci_readl(dev, reg)			\
-	__raw_readl(dev->regs + SDMMC_##reg)
+	__raw_readl((dev)->regs + SDMMC_##reg)
 #define mci_writel(dev, reg, value)			\
-	__raw_writel((value), dev->regs + SDMMC_##reg)
+	__raw_writel((value), (dev)->regs + SDMMC_##reg)
 
 /* 16-bit FIFO access macros */
 #define mci_readw(dev, reg)			\
-	__raw_readw(dev->regs + SDMMC_##reg)
+	__raw_readw((dev)->regs + SDMMC_##reg)
 #define mci_writew(dev, reg, value)			\
-	__raw_writew((value), dev->regs + SDMMC_##reg)
+	__raw_writew((value), (dev)->regs + SDMMC_##reg)
 
 /* 64-bit FIFO access macros */
 #ifdef readq
 #define mci_readq(dev, reg)			\
-	__raw_readq(dev->regs + SDMMC_##reg)
+	__raw_readq((dev)->regs + SDMMC_##reg)
 #define mci_writeq(dev, reg, value)			\
-	__raw_writeq((value), dev->regs + SDMMC_##reg)
+	__raw_writeq((value), (dev)->regs + SDMMC_##reg)
 #else
 /*
  * Dummy readq implementation for architectures that don't define it.
@@ -160,9 +159,9 @@
  * rest of the code free from ifdefs.
  */
 #define mci_readq(dev, reg)			\
-	(*(volatile u64 __force *)(dev->regs + SDMMC_##reg))
+	(*(volatile u64 __force *)((dev)->regs + SDMMC_##reg))
 #define mci_writeq(dev, reg, value)			\
-	(*(volatile u64 __force *)(dev->regs + SDMMC_##reg) = value)
+	(*(volatile u64 __force *)((dev)->regs + SDMMC_##reg) = (value))
 #endif
 
 #endif /* _DW_MMC_H_ */
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index fe14072..fef7140 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -226,6 +226,9 @@
 		return;
 	}
 
+	/* initialize pre request cookie */
+	host->next_data.cookie = 1;
+
 	/* Try to acquire a generic DMA engine slave channel */
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
@@ -335,7 +338,8 @@
 		dir = DMA_FROM_DEVICE;
 	}
 
-	dma_unmap_sg(chan->device->dev, data->sg, data->sg_len, dir);
+	if (!data->host_cookie)
+		dma_unmap_sg(chan->device->dev, data->sg, data->sg_len, dir);
 
 	/*
 	 * Use of DMA with scatter-gather is impossible.
@@ -353,7 +357,8 @@
 	dmaengine_terminate_all(host->dma_current);
 }
 
-static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl)
+static int mmci_dma_prep_data(struct mmci_host *host, struct mmc_data *data,
+			      struct mmci_host_next *next)
 {
 	struct variant_data *variant = host->variant;
 	struct dma_slave_config conf = {
@@ -364,13 +369,20 @@
 		.src_maxburst = variant->fifohalfsize >> 2, /* # of words */
 		.dst_maxburst = variant->fifohalfsize >> 2, /* # of words */
 	};
-	struct mmc_data *data = host->data;
 	struct dma_chan *chan;
 	struct dma_device *device;
 	struct dma_async_tx_descriptor *desc;
 	int nr_sg;
 
-	host->dma_current = NULL;
+	/* Check if next job is already prepared */
+	if (data->host_cookie && !next &&
+	    host->dma_current && host->dma_desc_current)
+		return 0;
+
+	if (!next) {
+		host->dma_current = NULL;
+		host->dma_desc_current = NULL;
+	}
 
 	if (data->flags & MMC_DATA_READ) {
 		conf.direction = DMA_FROM_DEVICE;
@@ -385,7 +397,7 @@
 		return -EINVAL;
 
 	/* If less than or equal to the fifo size, don't bother with DMA */
-	if (host->size <= variant->fifosize)
+	if (data->blksz * data->blocks <= variant->fifosize)
 		return -EINVAL;
 
 	device = chan->device;
@@ -399,14 +411,38 @@
 	if (!desc)
 		goto unmap_exit;
 
-	/* Okay, go for it. */
-	host->dma_current = chan;
+	if (next) {
+		next->dma_chan = chan;
+		next->dma_desc = desc;
+	} else {
+		host->dma_current = chan;
+		host->dma_desc_current = desc;
+	}
 
+	return 0;
+
+ unmap_exit:
+	if (!next)
+		dmaengine_terminate_all(chan);
+	dma_unmap_sg(device->dev, data->sg, data->sg_len, conf.direction);
+	return -ENOMEM;
+}
+
+static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl)
+{
+	int ret;
+	struct mmc_data *data = host->data;
+
+	ret = mmci_dma_prep_data(host, host->data, NULL);
+	if (ret)
+		return ret;
+
+	/* Okay, go for it. */
 	dev_vdbg(mmc_dev(host->mmc),
 		 "Submit MMCI DMA job, sglen %d blksz %04x blks %04x flags %08x\n",
 		 data->sg_len, data->blksz, data->blocks, data->flags);
-	dmaengine_submit(desc);
-	dma_async_issue_pending(chan);
+	dmaengine_submit(host->dma_desc_current);
+	dma_async_issue_pending(host->dma_current);
 
 	datactrl |= MCI_DPSM_DMAENABLE;
 
@@ -421,14 +457,90 @@
 	writel(readl(host->base + MMCIMASK0) | MCI_DATAENDMASK,
 	       host->base + MMCIMASK0);
 	return 0;
-
-unmap_exit:
-	dmaengine_terminate_all(chan);
-	dma_unmap_sg(device->dev, data->sg, data->sg_len, conf.direction);
-	return -ENOMEM;
 }
+
+static void mmci_get_next_data(struct mmci_host *host, struct mmc_data *data)
+{
+	struct mmci_host_next *next = &host->next_data;
+
+	if (data->host_cookie && data->host_cookie != next->cookie) {
+		printk(KERN_WARNING "[%s] invalid cookie: data->host_cookie %d"
+		       " host->next_data.cookie %d\n",
+		       __func__, data->host_cookie, host->next_data.cookie);
+		data->host_cookie = 0;
+	}
+
+	if (!data->host_cookie)
+		return;
+
+	host->dma_desc_current = next->dma_desc;
+	host->dma_current = next->dma_chan;
+
+	next->dma_desc = NULL;
+	next->dma_chan = NULL;
+}
+
+static void mmci_pre_request(struct mmc_host *mmc, struct mmc_request *mrq,
+			     bool is_first_req)
+{
+	struct mmci_host *host = mmc_priv(mmc);
+	struct mmc_data *data = mrq->data;
+	struct mmci_host_next *nd = &host->next_data;
+
+	if (!data)
+		return;
+
+	if (data->host_cookie) {
+		data->host_cookie = 0;
+		return;
+	}
+
+	/* if config for dma */
+	if (((data->flags & MMC_DATA_WRITE) && host->dma_tx_channel) ||
+	    ((data->flags & MMC_DATA_READ) && host->dma_rx_channel)) {
+		if (mmci_dma_prep_data(host, data, nd))
+			data->host_cookie = 0;
+		else
+			data->host_cookie = ++nd->cookie < 0 ? 1 : nd->cookie;
+	}
+}
+
+static void mmci_post_request(struct mmc_host *mmc, struct mmc_request *mrq,
+			      int err)
+{
+	struct mmci_host *host = mmc_priv(mmc);
+	struct mmc_data *data = mrq->data;
+	struct dma_chan *chan;
+	enum dma_data_direction dir;
+
+	if (!data)
+		return;
+
+	if (data->flags & MMC_DATA_READ) {
+		dir = DMA_FROM_DEVICE;
+		chan = host->dma_rx_channel;
+	} else {
+		dir = DMA_TO_DEVICE;
+		chan = host->dma_tx_channel;
+	}
+
+
+	/* if config for dma */
+	if (chan) {
+		if (err)
+			dmaengine_terminate_all(chan);
+		if (err || data->host_cookie)
+			dma_unmap_sg(mmc_dev(host->mmc), data->sg,
+				     data->sg_len, dir);
+		mrq->data->host_cookie = 0;
+	}
+}
+
 #else
 /* Blank functions if the DMA engine is not available */
+static void mmci_get_next_data(struct mmci_host *host, struct mmc_data *data)
+{
+}
 static inline void mmci_dma_setup(struct mmci_host *host)
 {
 }
@@ -449,6 +561,10 @@
 {
 	return -ENOSYS;
 }
+
+#define mmci_pre_request NULL
+#define mmci_post_request NULL
+
 #endif
 
 static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
@@ -872,6 +988,9 @@
 
 	host->mrq = mrq;
 
+	if (mrq->data)
+		mmci_get_next_data(host, mrq->data);
+
 	if (mrq->data && mrq->data->flags & MMC_DATA_READ)
 		mmci_start_data(host, mrq->data);
 
@@ -986,6 +1105,8 @@
 
 static const struct mmc_host_ops mmci_ops = {
 	.request	= mmci_request,
+	.pre_req	= mmci_pre_request,
+	.post_req	= mmci_post_request,
 	.set_ios	= mmci_set_ios,
 	.get_ro		= mmci_get_ro,
 	.get_cd		= mmci_get_cd,
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index 2164e8c..79e4143 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -166,6 +166,12 @@
 struct variant_data;
 struct dma_chan;
 
+struct mmci_host_next {
+	struct dma_async_tx_descriptor	*dma_desc;
+	struct dma_chan			*dma_chan;
+	s32				cookie;
+};
+
 struct mmci_host {
 	phys_addr_t		phybase;
 	void __iomem		*base;
@@ -203,6 +209,8 @@
 	struct dma_chan		*dma_current;
 	struct dma_chan		*dma_rx_channel;
 	struct dma_chan		*dma_tx_channel;
+	struct dma_async_tx_descriptor	*dma_desc_current;
+	struct mmci_host_next	next_data;
 
 #define dma_inprogress(host)	((host)->dma_current)
 #else
diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
index 99d39a6..d513d47 100644
--- a/drivers/mmc/host/mxs-mmc.c
+++ b/drivers/mmc/host/mxs-mmc.c
@@ -564,40 +564,38 @@
 
 static void mxs_mmc_set_clk_rate(struct mxs_mmc_host *host, unsigned int rate)
 {
-	unsigned int ssp_rate, bit_rate;
-	u32 div1, div2;
+	unsigned int ssp_clk, ssp_sck;
+	u32 clock_divide, clock_rate;
 	u32 val;
 
-	ssp_rate = clk_get_rate(host->clk);
+	ssp_clk = clk_get_rate(host->clk);
 
-	for (div1 = 2; div1 < 254; div1 += 2) {
-		div2 = ssp_rate / rate / div1;
-		if (div2 < 0x100)
+	for (clock_divide = 2; clock_divide <= 254; clock_divide += 2) {
+		clock_rate = DIV_ROUND_UP(ssp_clk, rate * clock_divide);
+		clock_rate = (clock_rate > 0) ? clock_rate - 1 : 0;
+		if (clock_rate <= 255)
 			break;
 	}
 
-	if (div1 >= 254) {
+	if (clock_divide > 254) {
 		dev_err(mmc_dev(host->mmc),
 			"%s: cannot set clock to %d\n", __func__, rate);
 		return;
 	}
 
-	if (div2 == 0)
-		bit_rate = ssp_rate / div1;
-	else
-		bit_rate = ssp_rate / div1 / div2;
+	ssp_sck = ssp_clk / clock_divide / (1 + clock_rate);
 
 	val = readl(host->base + HW_SSP_TIMING);
 	val &= ~(BM_SSP_TIMING_CLOCK_DIVIDE | BM_SSP_TIMING_CLOCK_RATE);
-	val |= BF_SSP(div1, TIMING_CLOCK_DIVIDE);
-	val |= BF_SSP(div2 - 1, TIMING_CLOCK_RATE);
+	val |= BF_SSP(clock_divide, TIMING_CLOCK_DIVIDE);
+	val |= BF_SSP(clock_rate, TIMING_CLOCK_RATE);
 	writel(val, host->base + HW_SSP_TIMING);
 
-	host->clk_rate = bit_rate;
+	host->clk_rate = ssp_sck;
 
 	dev_dbg(mmc_dev(host->mmc),
-		"%s: div1 %d, div2 %d, ssp %d, bit %d, rate %d\n",
-		__func__, div1, div2, ssp_rate, bit_rate, rate);
+		"%s: clock_divide %d, clock_rate %d, ssp_clk %d, rate_actual %d, rate_requested %d\n",
+		__func__, clock_divide, clock_rate, ssp_clk, ssp_sck, rate);
 }
 
 static void mxs_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index dedf3da..21e4a79 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -17,6 +17,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/kernel.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/interrupt.h>
@@ -33,6 +34,7 @@
 #include <linux/semaphore.h>
 #include <linux/gpio.h>
 #include <linux/regulator/consumer.h>
+#include <linux/pm_runtime.h>
 #include <plat/dma.h>
 #include <mach/hardware.h>
 #include <plat/board.h>
@@ -116,15 +118,13 @@
 #define OMAP_MMC4_DEVID		3
 #define OMAP_MMC5_DEVID		4
 
+#define MMC_AUTOSUSPEND_DELAY	100
 #define MMC_TIMEOUT_MS		20
 #define OMAP_MMC_MASTER_CLOCK	96000000
+#define OMAP_MMC_MIN_CLOCK	400000
+#define OMAP_MMC_MAX_CLOCK	52000000
 #define DRIVER_NAME		"omap_hsmmc"
 
-/* Timeouts for entering power saving states on inactivity, msec */
-#define OMAP_MMC_DISABLED_TIMEOUT	100
-#define OMAP_MMC_SLEEP_TIMEOUT		1000
-#define OMAP_MMC_OFF_TIMEOUT		8000
-
 /*
  * One controller can have multiple slots, like on some omap boards using
  * omap.c controller driver. Luckily this is not currently done on any known
@@ -141,6 +141,11 @@
 #define OMAP_HSMMC_WRITE(base, reg, val) \
 	__raw_writel((val), (base) + OMAP_HSMMC_##reg)
 
+struct omap_hsmmc_next {
+	unsigned int	dma_len;
+	s32		cookie;
+};
+
 struct omap_hsmmc_host {
 	struct	device		*dev;
 	struct	mmc_host	*mmc;
@@ -148,7 +153,6 @@
 	struct	mmc_command	*cmd;
 	struct	mmc_data	*data;
 	struct	clk		*fclk;
-	struct	clk		*iclk;
 	struct	clk		*dbclk;
 	/*
 	 * vcc == configured supply
@@ -184,6 +188,7 @@
 	int			reqs_blocked;
 	int			use_reg;
 	int			req_in_progress;
+	struct omap_hsmmc_next	next_data;
 
 	struct	omap_mmc_platform_data	*pdata;
 };
@@ -548,6 +553,15 @@
 }
 
 /*
+ * Start clock to the card
+ */
+static void omap_hsmmc_start_clock(struct omap_hsmmc_host *host)
+{
+	OMAP_HSMMC_WRITE(host->base, SYSCTL,
+		OMAP_HSMMC_READ(host->base, SYSCTL) | CEN);
+}
+
+/*
  * Stop clock to the card
  */
 static void omap_hsmmc_stop_clock(struct omap_hsmmc_host *host)
@@ -584,6 +598,81 @@
 	OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
 }
 
+/* Calculate divisor for the given clock frequency */
+static u16 calc_divisor(struct mmc_ios *ios)
+{
+	u16 dsor = 0;
+
+	if (ios->clock) {
+		dsor = DIV_ROUND_UP(OMAP_MMC_MASTER_CLOCK, ios->clock);
+		if (dsor > 250)
+			dsor = 250;
+	}
+
+	return dsor;
+}
+
+static void omap_hsmmc_set_clock(struct omap_hsmmc_host *host)
+{
+	struct mmc_ios *ios = &host->mmc->ios;
+	unsigned long regval;
+	unsigned long timeout;
+
+	dev_dbg(mmc_dev(host->mmc), "Set clock to %uHz\n", ios->clock);
+
+	omap_hsmmc_stop_clock(host);
+
+	regval = OMAP_HSMMC_READ(host->base, SYSCTL);
+	regval = regval & ~(CLKD_MASK | DTO_MASK);
+	regval = regval | (calc_divisor(ios) << 6) | (DTO << 16);
+	OMAP_HSMMC_WRITE(host->base, SYSCTL, regval);
+	OMAP_HSMMC_WRITE(host->base, SYSCTL,
+		OMAP_HSMMC_READ(host->base, SYSCTL) | ICE);
+
+	/* Wait till the ICS bit is set */
+	timeout = jiffies + msecs_to_jiffies(MMC_TIMEOUT_MS);
+	while ((OMAP_HSMMC_READ(host->base, SYSCTL) & ICS) != ICS
+		&& time_before(jiffies, timeout))
+		cpu_relax();
+
+	omap_hsmmc_start_clock(host);
+}
+
+static void omap_hsmmc_set_bus_width(struct omap_hsmmc_host *host)
+{
+	struct mmc_ios *ios = &host->mmc->ios;
+	u32 con;
+
+	con = OMAP_HSMMC_READ(host->base, CON);
+	switch (ios->bus_width) {
+	case MMC_BUS_WIDTH_8:
+		OMAP_HSMMC_WRITE(host->base, CON, con | DW8);
+		break;
+	case MMC_BUS_WIDTH_4:
+		OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8);
+		OMAP_HSMMC_WRITE(host->base, HCTL,
+			OMAP_HSMMC_READ(host->base, HCTL) | FOUR_BIT);
+		break;
+	case MMC_BUS_WIDTH_1:
+		OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8);
+		OMAP_HSMMC_WRITE(host->base, HCTL,
+			OMAP_HSMMC_READ(host->base, HCTL) & ~FOUR_BIT);
+		break;
+	}
+}
+
+static void omap_hsmmc_set_bus_mode(struct omap_hsmmc_host *host)
+{
+	struct mmc_ios *ios = &host->mmc->ios;
+	u32 con;
+
+	con = OMAP_HSMMC_READ(host->base, CON);
+	if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN)
+		OMAP_HSMMC_WRITE(host->base, CON, con | OD);
+	else
+		OMAP_HSMMC_WRITE(host->base, CON, con & ~OD);
+}
+
 #ifdef CONFIG_PM
 
 /*
@@ -595,8 +684,7 @@
 	struct mmc_ios *ios = &host->mmc->ios;
 	struct omap_mmc_platform_data *pdata = host->pdata;
 	int context_loss = 0;
-	u32 hctl, capa, con;
-	u16 dsor = 0;
+	u32 hctl, capa;
 	unsigned long timeout;
 
 	if (pdata->get_context_loss_count) {
@@ -658,54 +746,12 @@
 	if (host->power_mode == MMC_POWER_OFF)
 		goto out;
 
-	con = OMAP_HSMMC_READ(host->base, CON);
-	switch (ios->bus_width) {
-	case MMC_BUS_WIDTH_8:
-		OMAP_HSMMC_WRITE(host->base, CON, con | DW8);
-		break;
-	case MMC_BUS_WIDTH_4:
-		OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8);
-		OMAP_HSMMC_WRITE(host->base, HCTL,
-			OMAP_HSMMC_READ(host->base, HCTL) | FOUR_BIT);
-		break;
-	case MMC_BUS_WIDTH_1:
-		OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8);
-		OMAP_HSMMC_WRITE(host->base, HCTL,
-			OMAP_HSMMC_READ(host->base, HCTL) & ~FOUR_BIT);
-		break;
-	}
+	omap_hsmmc_set_bus_width(host);
 
-	if (ios->clock) {
-		dsor = OMAP_MMC_MASTER_CLOCK / ios->clock;
-		if (dsor < 1)
-			dsor = 1;
+	omap_hsmmc_set_clock(host);
 
-		if (OMAP_MMC_MASTER_CLOCK / dsor > ios->clock)
-			dsor++;
+	omap_hsmmc_set_bus_mode(host);
 
-		if (dsor > 250)
-			dsor = 250;
-	}
-
-	OMAP_HSMMC_WRITE(host->base, SYSCTL,
-		OMAP_HSMMC_READ(host->base, SYSCTL) & ~CEN);
-	OMAP_HSMMC_WRITE(host->base, SYSCTL, (dsor << 6) | (DTO << 16));
-	OMAP_HSMMC_WRITE(host->base, SYSCTL,
-		OMAP_HSMMC_READ(host->base, SYSCTL) | ICE);
-
-	timeout = jiffies + msecs_to_jiffies(MMC_TIMEOUT_MS);
-	while ((OMAP_HSMMC_READ(host->base, SYSCTL) & ICS) != ICS
-		&& time_before(jiffies, timeout))
-		;
-
-	OMAP_HSMMC_WRITE(host->base, SYSCTL,
-		OMAP_HSMMC_READ(host->base, SYSCTL) | CEN);
-
-	con = OMAP_HSMMC_READ(host->base, CON);
-	if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN)
-		OMAP_HSMMC_WRITE(host->base, CON, con | OD);
-	else
-		OMAP_HSMMC_WRITE(host->base, CON, con & ~OD);
 out:
 	host->context_loss = context_loss;
 
@@ -973,14 +1019,14 @@
  * Readable error output
  */
 #ifdef CONFIG_MMC_DEBUG
-static void omap_hsmmc_report_irq(struct omap_hsmmc_host *host, u32 status)
+static void omap_hsmmc_dbg_report_irq(struct omap_hsmmc_host *host, u32 status)
 {
 	/* --- means reserved bit without definition at documentation */
 	static const char *omap_hsmmc_status_bits[] = {
-		"CC", "TC", "BGE", "---", "BWR", "BRR", "---", "---", "CIRQ",
-		"OBI", "---", "---", "---", "---", "---", "ERRI", "CTO", "CCRC",
-		"CEB", "CIE", "DTO", "DCRC", "DEB", "---", "ACE", "---",
-		"---", "---", "---", "CERR", "CERR", "BADA", "---", "---", "---"
+		"CC"  , "TC"  , "BGE", "---", "BWR" , "BRR" , "---" , "---" ,
+		"CIRQ",	"OBI" , "---", "---", "---" , "---" , "---" , "ERRI",
+		"CTO" , "CCRC", "CEB", "CIE", "DTO" , "DCRC", "DEB" , "---" ,
+		"ACE" , "---" , "---", "---", "CERR", "BADA", "---" , "---"
 	};
 	char res[256];
 	char *buf = res;
@@ -997,6 +1043,11 @@
 
 	dev_dbg(mmc_dev(host->mmc), "%s\n", res);
 }
+#else
+static inline void omap_hsmmc_dbg_report_irq(struct omap_hsmmc_host *host,
+					     u32 status)
+{
+}
 #endif  /* CONFIG_MMC_DEBUG */
 
 /*
@@ -1055,9 +1106,7 @@
 	dev_dbg(mmc_dev(host->mmc), "IRQ Status is %x\n", status);
 
 	if (status & ERR) {
-#ifdef CONFIG_MMC_DEBUG
-		omap_hsmmc_report_irq(host, status);
-#endif
+		omap_hsmmc_dbg_report_irq(host, status);
 		if ((status & CMD_TIMEOUT) ||
 			(status & CMD_CRC)) {
 			if (host->cmd) {
@@ -1155,8 +1204,7 @@
 	int ret;
 
 	/* Disable the clocks */
-	clk_disable(host->fclk);
-	clk_disable(host->iclk);
+	pm_runtime_put_sync(host->dev);
 	if (host->got_dbclk)
 		clk_disable(host->dbclk);
 
@@ -1167,8 +1215,7 @@
 	if (!ret)
 		ret = mmc_slot(host).set_power(host->dev, host->slot_id, 1,
 					       vdd);
-	clk_enable(host->iclk);
-	clk_enable(host->fclk);
+	pm_runtime_get_sync(host->dev);
 	if (host->got_dbclk)
 		clk_enable(host->dbclk);
 
@@ -1322,7 +1369,7 @@
 static void omap_hsmmc_dma_cb(int lch, u16 ch_status, void *cb_data)
 {
 	struct omap_hsmmc_host *host = cb_data;
-	struct mmc_data *data = host->mrq->data;
+	struct mmc_data *data;
 	int dma_ch, req_in_progress;
 
 	if (!(ch_status & OMAP_DMA_BLOCK_IRQ)) {
@@ -1337,6 +1384,7 @@
 		return;
 	}
 
+	data = host->mrq->data;
 	host->dma_sg_idx++;
 	if (host->dma_sg_idx < host->dma_len) {
 		/* Fire up the next transfer. */
@@ -1346,8 +1394,9 @@
 		return;
 	}
 
-	dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
-		omap_hsmmc_get_dma_dir(host, data));
+	if (!data->host_cookie)
+		dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+			     omap_hsmmc_get_dma_dir(host, data));
 
 	req_in_progress = host->req_in_progress;
 	dma_ch = host->dma_ch;
@@ -1365,6 +1414,45 @@
 	}
 }
 
+static int omap_hsmmc_pre_dma_transfer(struct omap_hsmmc_host *host,
+				       struct mmc_data *data,
+				       struct omap_hsmmc_next *next)
+{
+	int dma_len;
+
+	if (!next && data->host_cookie &&
+	    data->host_cookie != host->next_data.cookie) {
+		printk(KERN_WARNING "[%s] invalid cookie: data->host_cookie %d"
+		       " host->next_data.cookie %d\n",
+		       __func__, data->host_cookie, host->next_data.cookie);
+		data->host_cookie = 0;
+	}
+
+	/* Check if next job is already prepared */
+	if (next ||
+	    (!next && data->host_cookie != host->next_data.cookie)) {
+		dma_len = dma_map_sg(mmc_dev(host->mmc), data->sg,
+				     data->sg_len,
+				     omap_hsmmc_get_dma_dir(host, data));
+
+	} else {
+		dma_len = host->next_data.dma_len;
+		host->next_data.dma_len = 0;
+	}
+
+
+	if (dma_len == 0)
+		return -EINVAL;
+
+	if (next) {
+		next->dma_len = dma_len;
+		data->host_cookie = ++next->cookie < 0 ? 1 : next->cookie;
+	} else
+		host->dma_len = dma_len;
+
+	return 0;
+}
+
 /*
  * Routine to configure and start DMA for the MMC card
  */
@@ -1398,9 +1486,10 @@
 			mmc_hostname(host->mmc), ret);
 		return ret;
 	}
+	ret = omap_hsmmc_pre_dma_transfer(host, data, NULL);
+	if (ret)
+		return ret;
 
-	host->dma_len = dma_map_sg(mmc_dev(host->mmc), data->sg,
-			data->sg_len, omap_hsmmc_get_dma_dir(host, data));
 	host->dma_ch = dma_ch;
 	host->dma_sg_idx = 0;
 
@@ -1480,6 +1569,35 @@
 	return 0;
 }
 
+static void omap_hsmmc_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
+				int err)
+{
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
+	struct mmc_data *data = mrq->data;
+
+	if (host->use_dma) {
+		dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+			     omap_hsmmc_get_dma_dir(host, data));
+		data->host_cookie = 0;
+	}
+}
+
+static void omap_hsmmc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq,
+			       bool is_first_req)
+{
+	struct omap_hsmmc_host *host = mmc_priv(mmc);
+
+	if (mrq->data->host_cookie) {
+		mrq->data->host_cookie = 0;
+		return ;
+	}
+
+	if (host->use_dma)
+		if (omap_hsmmc_pre_dma_transfer(host, mrq->data,
+						&host->next_data))
+			mrq->data->host_cookie = 0;
+}
+
 /*
  * Request function. for read/write operation
  */
@@ -1528,13 +1646,9 @@
 static void omap_hsmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 {
 	struct omap_hsmmc_host *host = mmc_priv(mmc);
-	u16 dsor = 0;
-	unsigned long regval;
-	unsigned long timeout;
-	u32 con;
 	int do_send_init_stream = 0;
 
-	mmc_host_enable(host->mmc);
+	pm_runtime_get_sync(host->dev);
 
 	if (ios->power_mode != host->power_mode) {
 		switch (ios->power_mode) {
@@ -1557,22 +1671,7 @@
 
 	/* FIXME: set registers based only on changes to ios */
 
-	con = OMAP_HSMMC_READ(host->base, CON);
-	switch (mmc->ios.bus_width) {
-	case MMC_BUS_WIDTH_8:
-		OMAP_HSMMC_WRITE(host->base, CON, con | DW8);
-		break;
-	case MMC_BUS_WIDTH_4:
-		OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8);
-		OMAP_HSMMC_WRITE(host->base, HCTL,
-			OMAP_HSMMC_READ(host->base, HCTL) | FOUR_BIT);
-		break;
-	case MMC_BUS_WIDTH_1:
-		OMAP_HSMMC_WRITE(host->base, CON, con & ~DW8);
-		OMAP_HSMMC_WRITE(host->base, HCTL,
-			OMAP_HSMMC_READ(host->base, HCTL) & ~FOUR_BIT);
-		break;
-	}
+	omap_hsmmc_set_bus_width(host);
 
 	if (host->pdata->controller_flags & OMAP_HSMMC_SUPPORTS_DUAL_VOLT) {
 		/* Only MMC1 can interface at 3V without some flavor
@@ -1592,47 +1691,14 @@
 		}
 	}
 
-	if (ios->clock) {
-		dsor = OMAP_MMC_MASTER_CLOCK / ios->clock;
-		if (dsor < 1)
-			dsor = 1;
-
-		if (OMAP_MMC_MASTER_CLOCK / dsor > ios->clock)
-			dsor++;
-
-		if (dsor > 250)
-			dsor = 250;
-	}
-	omap_hsmmc_stop_clock(host);
-	regval = OMAP_HSMMC_READ(host->base, SYSCTL);
-	regval = regval & ~(CLKD_MASK);
-	regval = regval | (dsor << 6) | (DTO << 16);
-	OMAP_HSMMC_WRITE(host->base, SYSCTL, regval);
-	OMAP_HSMMC_WRITE(host->base, SYSCTL,
-		OMAP_HSMMC_READ(host->base, SYSCTL) | ICE);
-
-	/* Wait till the ICS bit is set */
-	timeout = jiffies + msecs_to_jiffies(MMC_TIMEOUT_MS);
-	while ((OMAP_HSMMC_READ(host->base, SYSCTL) & ICS) != ICS
-		&& time_before(jiffies, timeout))
-		msleep(1);
-
-	OMAP_HSMMC_WRITE(host->base, SYSCTL,
-		OMAP_HSMMC_READ(host->base, SYSCTL) | CEN);
+	omap_hsmmc_set_clock(host);
 
 	if (do_send_init_stream)
 		send_init_stream(host);
 
-	con = OMAP_HSMMC_READ(host->base, CON);
-	if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN)
-		OMAP_HSMMC_WRITE(host->base, CON, con | OD);
-	else
-		OMAP_HSMMC_WRITE(host->base, CON, con & ~OD);
+	omap_hsmmc_set_bus_mode(host);
 
-	if (host->power_mode == MMC_POWER_OFF)
-		mmc_host_disable(host->mmc);
-	else
-		mmc_host_lazy_disable(host->mmc);
+	pm_runtime_put_autosuspend(host->dev);
 }
 
 static int omap_hsmmc_get_cd(struct mmc_host *mmc)
@@ -1688,230 +1754,12 @@
 	set_sd_bus_power(host);
 }
 
-/*
- * Dynamic power saving handling, FSM:
- *   ENABLED -> DISABLED -> CARDSLEEP / REGSLEEP -> OFF
- *     ^___________|          |                      |
- *     |______________________|______________________|
- *
- * ENABLED:   mmc host is fully functional
- * DISABLED:  fclk is off
- * CARDSLEEP: fclk is off, card is asleep, voltage regulator is asleep
- * REGSLEEP:  fclk is off, voltage regulator is asleep
- * OFF:       fclk is off, voltage regulator is off
- *
- * Transition handlers return the timeout for the next state transition
- * or negative error.
- */
-
-enum {ENABLED = 0, DISABLED, CARDSLEEP, REGSLEEP, OFF};
-
-/* Handler for [ENABLED -> DISABLED] transition */
-static int omap_hsmmc_enabled_to_disabled(struct omap_hsmmc_host *host)
-{
-	omap_hsmmc_context_save(host);
-	clk_disable(host->fclk);
-	host->dpm_state = DISABLED;
-
-	dev_dbg(mmc_dev(host->mmc), "ENABLED -> DISABLED\n");
-
-	if (host->power_mode == MMC_POWER_OFF)
-		return 0;
-
-	return OMAP_MMC_SLEEP_TIMEOUT;
-}
-
-/* Handler for [DISABLED -> REGSLEEP / CARDSLEEP] transition */
-static int omap_hsmmc_disabled_to_sleep(struct omap_hsmmc_host *host)
-{
-	int err, new_state;
-
-	if (!mmc_try_claim_host(host->mmc))
-		return 0;
-
-	clk_enable(host->fclk);
-	omap_hsmmc_context_restore(host);
-	if (mmc_card_can_sleep(host->mmc)) {
-		err = mmc_card_sleep(host->mmc);
-		if (err < 0) {
-			clk_disable(host->fclk);
-			mmc_release_host(host->mmc);
-			return err;
-		}
-		new_state = CARDSLEEP;
-	} else {
-		new_state = REGSLEEP;
-	}
-	if (mmc_slot(host).set_sleep)
-		mmc_slot(host).set_sleep(host->dev, host->slot_id, 1, 0,
-					 new_state == CARDSLEEP);
-	/* FIXME: turn off bus power and perhaps interrupts too */
-	clk_disable(host->fclk);
-	host->dpm_state = new_state;
-
-	mmc_release_host(host->mmc);
-
-	dev_dbg(mmc_dev(host->mmc), "DISABLED -> %s\n",
-		host->dpm_state == CARDSLEEP ? "CARDSLEEP" : "REGSLEEP");
-
-	if (mmc_slot(host).no_off)
-		return 0;
-
-	if ((host->mmc->caps & MMC_CAP_NONREMOVABLE) ||
-	    mmc_slot(host).card_detect ||
-	    (mmc_slot(host).get_cover_state &&
-	     mmc_slot(host).get_cover_state(host->dev, host->slot_id)))
-		return OMAP_MMC_OFF_TIMEOUT;
-
-	return 0;
-}
-
-/* Handler for [REGSLEEP / CARDSLEEP -> OFF] transition */
-static int omap_hsmmc_sleep_to_off(struct omap_hsmmc_host *host)
-{
-	if (!mmc_try_claim_host(host->mmc))
-		return 0;
-
-	if (mmc_slot(host).no_off)
-		return 0;
-
-	if (!((host->mmc->caps & MMC_CAP_NONREMOVABLE) ||
-	      mmc_slot(host).card_detect ||
-	      (mmc_slot(host).get_cover_state &&
-	       mmc_slot(host).get_cover_state(host->dev, host->slot_id)))) {
-		mmc_release_host(host->mmc);
-		return 0;
-	}
-
-	mmc_slot(host).set_power(host->dev, host->slot_id, 0, 0);
-	host->vdd = 0;
-	host->power_mode = MMC_POWER_OFF;
-
-	dev_dbg(mmc_dev(host->mmc), "%s -> OFF\n",
-		host->dpm_state == CARDSLEEP ? "CARDSLEEP" : "REGSLEEP");
-
-	host->dpm_state = OFF;
-
-	mmc_release_host(host->mmc);
-
-	return 0;
-}
-
-/* Handler for [DISABLED -> ENABLED] transition */
-static int omap_hsmmc_disabled_to_enabled(struct omap_hsmmc_host *host)
-{
-	int err;
-
-	err = clk_enable(host->fclk);
-	if (err < 0)
-		return err;
-
-	omap_hsmmc_context_restore(host);
-	host->dpm_state = ENABLED;
-
-	dev_dbg(mmc_dev(host->mmc), "DISABLED -> ENABLED\n");
-
-	return 0;
-}
-
-/* Handler for [SLEEP -> ENABLED] transition */
-static int omap_hsmmc_sleep_to_enabled(struct omap_hsmmc_host *host)
-{
-	if (!mmc_try_claim_host(host->mmc))
-		return 0;
-
-	clk_enable(host->fclk);
-	omap_hsmmc_context_restore(host);
-	if (mmc_slot(host).set_sleep)
-		mmc_slot(host).set_sleep(host->dev, host->slot_id, 0,
-			 host->vdd, host->dpm_state == CARDSLEEP);
-	if (mmc_card_can_sleep(host->mmc))
-		mmc_card_awake(host->mmc);
-
-	dev_dbg(mmc_dev(host->mmc), "%s -> ENABLED\n",
-		host->dpm_state == CARDSLEEP ? "CARDSLEEP" : "REGSLEEP");
-
-	host->dpm_state = ENABLED;
-
-	mmc_release_host(host->mmc);
-
-	return 0;
-}
-
-/* Handler for [OFF -> ENABLED] transition */
-static int omap_hsmmc_off_to_enabled(struct omap_hsmmc_host *host)
-{
-	clk_enable(host->fclk);
-
-	omap_hsmmc_context_restore(host);
-	omap_hsmmc_conf_bus_power(host);
-	mmc_power_restore_host(host->mmc);
-
-	host->dpm_state = ENABLED;
-
-	dev_dbg(mmc_dev(host->mmc), "OFF -> ENABLED\n");
-
-	return 0;
-}
-
-/*
- * Bring MMC host to ENABLED from any other PM state.
- */
-static int omap_hsmmc_enable(struct mmc_host *mmc)
-{
-	struct omap_hsmmc_host *host = mmc_priv(mmc);
-
-	switch (host->dpm_state) {
-	case DISABLED:
-		return omap_hsmmc_disabled_to_enabled(host);
-	case CARDSLEEP:
-	case REGSLEEP:
-		return omap_hsmmc_sleep_to_enabled(host);
-	case OFF:
-		return omap_hsmmc_off_to_enabled(host);
-	default:
-		dev_dbg(mmc_dev(host->mmc), "UNKNOWN state\n");
-		return -EINVAL;
-	}
-}
-
-/*
- * Bring MMC host in PM state (one level deeper).
- */
-static int omap_hsmmc_disable(struct mmc_host *mmc, int lazy)
-{
-	struct omap_hsmmc_host *host = mmc_priv(mmc);
-
-	switch (host->dpm_state) {
-	case ENABLED: {
-		int delay;
-
-		delay = omap_hsmmc_enabled_to_disabled(host);
-		if (lazy || delay < 0)
-			return delay;
-		return 0;
-	}
-	case DISABLED:
-		return omap_hsmmc_disabled_to_sleep(host);
-	case CARDSLEEP:
-	case REGSLEEP:
-		return omap_hsmmc_sleep_to_off(host);
-	default:
-		dev_dbg(mmc_dev(host->mmc), "UNKNOWN state\n");
-		return -EINVAL;
-	}
-}
-
 static int omap_hsmmc_enable_fclk(struct mmc_host *mmc)
 {
 	struct omap_hsmmc_host *host = mmc_priv(mmc);
-	int err;
 
-	err = clk_enable(host->fclk);
-	if (err)
-		return err;
-	dev_dbg(mmc_dev(host->mmc), "mmc_fclk: enabled\n");
-	omap_hsmmc_context_restore(host);
+	pm_runtime_get_sync(host->dev);
+
 	return 0;
 }
 
@@ -1919,26 +1767,17 @@
 {
 	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
-	omap_hsmmc_context_save(host);
-	clk_disable(host->fclk);
-	dev_dbg(mmc_dev(host->mmc), "mmc_fclk: disabled\n");
+	pm_runtime_mark_last_busy(host->dev);
+	pm_runtime_put_autosuspend(host->dev);
+
 	return 0;
 }
 
 static const struct mmc_host_ops omap_hsmmc_ops = {
 	.enable = omap_hsmmc_enable_fclk,
 	.disable = omap_hsmmc_disable_fclk,
-	.request = omap_hsmmc_request,
-	.set_ios = omap_hsmmc_set_ios,
-	.get_cd = omap_hsmmc_get_cd,
-	.get_ro = omap_hsmmc_get_ro,
-	.init_card = omap_hsmmc_init_card,
-	/* NYET -- enable_sdio_irq */
-};
-
-static const struct mmc_host_ops omap_hsmmc_ps_ops = {
-	.enable = omap_hsmmc_enable,
-	.disable = omap_hsmmc_disable,
+	.post_req = omap_hsmmc_post_req,
+	.pre_req = omap_hsmmc_pre_req,
 	.request = omap_hsmmc_request,
 	.set_ios = omap_hsmmc_set_ios,
 	.get_cd = omap_hsmmc_get_cd,
@@ -1968,15 +1807,12 @@
 			host->dpm_state, mmc->nesting_cnt,
 			host->context_loss, context_loss);
 
-	if (host->suspended || host->dpm_state == OFF) {
+	if (host->suspended) {
 		seq_printf(s, "host suspended, can't read registers\n");
 		return 0;
 	}
 
-	if (clk_enable(host->fclk) != 0) {
-		seq_printf(s, "can't read the regs\n");
-		return 0;
-	}
+	pm_runtime_get_sync(host->dev);
 
 	seq_printf(s, "SYSCONFIG:\t0x%08x\n",
 			OMAP_HSMMC_READ(host->base, SYSCONFIG));
@@ -1993,7 +1829,8 @@
 	seq_printf(s, "CAPA:\t\t0x%08x\n",
 			OMAP_HSMMC_READ(host->base, CAPA));
 
-	clk_disable(host->fclk);
+	pm_runtime_mark_last_busy(host->dev);
+	pm_runtime_put_autosuspend(host->dev);
 
 	return 0;
 }
@@ -2077,14 +1914,12 @@
 	host->mapbase	= res->start;
 	host->base	= ioremap(host->mapbase, SZ_4K);
 	host->power_mode = MMC_POWER_OFF;
+	host->next_data.cookie = 1;
 
 	platform_set_drvdata(pdev, host);
 	INIT_WORK(&host->mmc_carddetect_work, omap_hsmmc_detect);
 
-	if (mmc_slot(host).power_saving)
-		mmc->ops	= &omap_hsmmc_ps_ops;
-	else
-		mmc->ops	= &omap_hsmmc_ops;
+	mmc->ops	= &omap_hsmmc_ops;
 
 	/*
 	 * If regulator_disable can only put vcc_aux to sleep then there is
@@ -2093,44 +1928,26 @@
 	if (mmc_slot(host).vcc_aux_disable_is_sleep)
 		mmc_slot(host).no_off = 1;
 
-	mmc->f_min	= 400000;
-	mmc->f_max	= 52000000;
+	mmc->f_min	= OMAP_MMC_MIN_CLOCK;
+	mmc->f_max	= OMAP_MMC_MAX_CLOCK;
 
 	spin_lock_init(&host->irq_lock);
 
-	host->iclk = clk_get(&pdev->dev, "ick");
-	if (IS_ERR(host->iclk)) {
-		ret = PTR_ERR(host->iclk);
-		host->iclk = NULL;
-		goto err1;
-	}
 	host->fclk = clk_get(&pdev->dev, "fck");
 	if (IS_ERR(host->fclk)) {
 		ret = PTR_ERR(host->fclk);
 		host->fclk = NULL;
-		clk_put(host->iclk);
 		goto err1;
 	}
 
 	omap_hsmmc_context_save(host);
 
 	mmc->caps |= MMC_CAP_DISABLE;
-	mmc_set_disable_delay(mmc, OMAP_MMC_DISABLED_TIMEOUT);
-	/* we start off in DISABLED state */
-	host->dpm_state = DISABLED;
 
-	if (clk_enable(host->iclk) != 0) {
-		clk_put(host->iclk);
-		clk_put(host->fclk);
-		goto err1;
-	}
-
-	if (mmc_host_enable(host->mmc) != 0) {
-		clk_disable(host->iclk);
-		clk_put(host->iclk);
-		clk_put(host->fclk);
-		goto err1;
-	}
+	pm_runtime_enable(host->dev);
+	pm_runtime_get_sync(host->dev);
+	pm_runtime_set_autosuspend_delay(host->dev, MMC_AUTOSUSPEND_DELAY);
+	pm_runtime_use_autosuspend(host->dev);
 
 	if (cpu_is_omap2430()) {
 		host->dbclk = clk_get(&pdev->dev, "mmchsdb_fck");
@@ -2240,8 +2057,6 @@
 
 	omap_hsmmc_disable_irq(host);
 
-	mmc_host_lazy_disable(host->mmc);
-
 	omap_hsmmc_protect_card(host);
 
 	mmc_add_host(mmc);
@@ -2259,6 +2074,8 @@
 	}
 
 	omap_hsmmc_debugfs(mmc);
+	pm_runtime_mark_last_busy(host->dev);
+	pm_runtime_put_autosuspend(host->dev);
 
 	return 0;
 
@@ -2274,10 +2091,9 @@
 err_irq_cd_init:
 	free_irq(host->irq, host);
 err_irq:
-	mmc_host_disable(host->mmc);
-	clk_disable(host->iclk);
+	pm_runtime_mark_last_busy(host->dev);
+	pm_runtime_put_autosuspend(host->dev);
 	clk_put(host->fclk);
-	clk_put(host->iclk);
 	if (host->got_dbclk) {
 		clk_disable(host->dbclk);
 		clk_put(host->dbclk);
@@ -2299,7 +2115,7 @@
 	struct resource *res;
 
 	if (host) {
-		mmc_host_enable(host->mmc);
+		pm_runtime_get_sync(host->dev);
 		mmc_remove_host(host->mmc);
 		if (host->use_reg)
 			omap_hsmmc_reg_put(host);
@@ -2310,10 +2126,9 @@
 			free_irq(mmc_slot(host).card_detect_irq, host);
 		flush_work_sync(&host->mmc_carddetect_work);
 
-		mmc_host_disable(host->mmc);
-		clk_disable(host->iclk);
+		pm_runtime_put_sync(host->dev);
+		pm_runtime_disable(host->dev);
 		clk_put(host->fclk);
-		clk_put(host->iclk);
 		if (host->got_dbclk) {
 			clk_disable(host->dbclk);
 			clk_put(host->dbclk);
@@ -2343,6 +2158,7 @@
 		return 0;
 
 	if (host) {
+		pm_runtime_get_sync(host->dev);
 		host->suspended = 1;
 		if (host->pdata->suspend) {
 			ret = host->pdata->suspend(&pdev->dev,
@@ -2357,13 +2173,11 @@
 		}
 		cancel_work_sync(&host->mmc_carddetect_work);
 		ret = mmc_suspend_host(host->mmc);
-		mmc_host_enable(host->mmc);
+
 		if (ret == 0) {
 			omap_hsmmc_disable_irq(host);
 			OMAP_HSMMC_WRITE(host->base, HCTL,
 				OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP);
-			mmc_host_disable(host->mmc);
-			clk_disable(host->iclk);
 			if (host->got_dbclk)
 				clk_disable(host->dbclk);
 		} else {
@@ -2375,9 +2189,8 @@
 					dev_dbg(mmc_dev(host->mmc),
 						"Unmask interrupt failed\n");
 			}
-			mmc_host_disable(host->mmc);
 		}
-
+		pm_runtime_put_sync(host->dev);
 	}
 	return ret;
 }
@@ -2393,14 +2206,7 @@
 		return 0;
 
 	if (host) {
-		ret = clk_enable(host->iclk);
-		if (ret)
-			goto clk_en_err;
-
-		if (mmc_host_enable(host->mmc) != 0) {
-			clk_disable(host->iclk);
-			goto clk_en_err;
-		}
+		pm_runtime_get_sync(host->dev);
 
 		if (host->got_dbclk)
 			clk_enable(host->dbclk);
@@ -2421,15 +2227,12 @@
 		if (ret == 0)
 			host->suspended = 0;
 
-		mmc_host_lazy_disable(host->mmc);
+		pm_runtime_mark_last_busy(host->dev);
+		pm_runtime_put_autosuspend(host->dev);
 	}
 
 	return ret;
 
-clk_en_err:
-	dev_dbg(mmc_dev(host->mmc),
-		"Failed to enable MMC clocks during resume\n");
-	return ret;
 }
 
 #else
@@ -2437,9 +2240,33 @@
 #define omap_hsmmc_resume		NULL
 #endif
 
+static int omap_hsmmc_runtime_suspend(struct device *dev)
+{
+	struct omap_hsmmc_host *host;
+
+	host = platform_get_drvdata(to_platform_device(dev));
+	omap_hsmmc_context_save(host);
+	dev_dbg(mmc_dev(host->mmc), "disabled\n");
+
+	return 0;
+}
+
+static int omap_hsmmc_runtime_resume(struct device *dev)
+{
+	struct omap_hsmmc_host *host;
+
+	host = platform_get_drvdata(to_platform_device(dev));
+	omap_hsmmc_context_restore(host);
+	dev_dbg(mmc_dev(host->mmc), "enabled\n");
+
+	return 0;
+}
+
 static struct dev_pm_ops omap_hsmmc_dev_pm_ops = {
 	.suspend	= omap_hsmmc_suspend,
 	.resume		= omap_hsmmc_resume,
+	.runtime_suspend = omap_hsmmc_runtime_suspend,
+	.runtime_resume = omap_hsmmc_runtime_resume,
 };
 
 static struct platform_driver omap_hsmmc_driver = {
diff --git a/drivers/mmc/host/sdhci-cns3xxx.c b/drivers/mmc/host/sdhci-cns3xxx.c
index 9ebd1d7..4b920b7 100644
--- a/drivers/mmc/host/sdhci-cns3xxx.c
+++ b/drivers/mmc/host/sdhci-cns3xxx.c
@@ -15,9 +15,7 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/mmc/host.h>
-#include <linux/mmc/sdhci-pltfm.h>
 #include <mach/cns3xxx.h>
-#include "sdhci.h"
 #include "sdhci-pltfm.h"
 
 static unsigned int sdhci_cns3xxx_get_max_clk(struct sdhci_host *host)
@@ -86,7 +84,7 @@
 	.set_clock	= sdhci_cns3xxx_set_clock,
 };
 
-struct sdhci_pltfm_data sdhci_cns3xxx_pdata = {
+static struct sdhci_pltfm_data sdhci_cns3xxx_pdata = {
 	.ops = &sdhci_cns3xxx_ops,
 	.quirks = SDHCI_QUIRK_BROKEN_DMA |
 		  SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
@@ -95,3 +93,43 @@
 		  SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
 		  SDHCI_QUIRK_NONSTANDARD_CLOCK,
 };
+
+static int __devinit sdhci_cns3xxx_probe(struct platform_device *pdev)
+{
+	return sdhci_pltfm_register(pdev, &sdhci_cns3xxx_pdata);
+}
+
+static int __devexit sdhci_cns3xxx_remove(struct platform_device *pdev)
+{
+	return sdhci_pltfm_unregister(pdev);
+}
+
+static struct platform_driver sdhci_cns3xxx_driver = {
+	.driver		= {
+		.name	= "sdhci-cns3xxx",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= sdhci_cns3xxx_probe,
+	.remove		= __devexit_p(sdhci_cns3xxx_remove),
+#ifdef CONFIG_PM
+	.suspend	= sdhci_pltfm_suspend,
+	.resume		= sdhci_pltfm_resume,
+#endif
+};
+
+static int __init sdhci_cns3xxx_init(void)
+{
+	return platform_driver_register(&sdhci_cns3xxx_driver);
+}
+module_init(sdhci_cns3xxx_init);
+
+static void __exit sdhci_cns3xxx_exit(void)
+{
+	platform_driver_unregister(&sdhci_cns3xxx_driver);
+}
+module_exit(sdhci_cns3xxx_exit);
+
+MODULE_DESCRIPTION("SDHCI driver for CNS3xxx");
+MODULE_AUTHOR("Scott Shu, "
+	      "Anton Vorontsov <avorontsov@mvista.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/sdhci-dove.c b/drivers/mmc/host/sdhci-dove.c
index 2aeef4f..f2d29dc 100644
--- a/drivers/mmc/host/sdhci-dove.c
+++ b/drivers/mmc/host/sdhci-dove.c
@@ -22,7 +22,6 @@
 #include <linux/io.h>
 #include <linux/mmc/host.h>
 
-#include "sdhci.h"
 #include "sdhci-pltfm.h"
 
 static u16 sdhci_dove_readw(struct sdhci_host *host, int reg)
@@ -61,10 +60,50 @@
 	.read_l	= sdhci_dove_readl,
 };
 
-struct sdhci_pltfm_data sdhci_dove_pdata = {
+static struct sdhci_pltfm_data sdhci_dove_pdata = {
 	.ops	= &sdhci_dove_ops,
 	.quirks	= SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER |
 		  SDHCI_QUIRK_NO_BUSY_IRQ |
 		  SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
 		  SDHCI_QUIRK_FORCE_DMA,
 };
+
+static int __devinit sdhci_dove_probe(struct platform_device *pdev)
+{
+	return sdhci_pltfm_register(pdev, &sdhci_dove_pdata);
+}
+
+static int __devexit sdhci_dove_remove(struct platform_device *pdev)
+{
+	return sdhci_pltfm_unregister(pdev);
+}
+
+static struct platform_driver sdhci_dove_driver = {
+	.driver		= {
+		.name	= "sdhci-dove",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= sdhci_dove_probe,
+	.remove		= __devexit_p(sdhci_dove_remove),
+#ifdef CONFIG_PM
+	.suspend	= sdhci_pltfm_suspend,
+	.resume		= sdhci_pltfm_resume,
+#endif
+};
+
+static int __init sdhci_dove_init(void)
+{
+	return platform_driver_register(&sdhci_dove_driver);
+}
+module_init(sdhci_dove_init);
+
+static void __exit sdhci_dove_exit(void)
+{
+	platform_driver_unregister(&sdhci_dove_driver);
+}
+module_exit(sdhci_dove_exit);
+
+MODULE_DESCRIPTION("SDHCI driver for Dove");
+MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>, "
+	      "Mike Rapoport <mike@compulab.co.il>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index a19967d..710b706 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -18,12 +18,10 @@
 #include <linux/gpio.h>
 #include <linux/slab.h>
 #include <linux/mmc/host.h>
-#include <linux/mmc/sdhci-pltfm.h>
 #include <linux/mmc/mmc.h>
 #include <linux/mmc/sdio.h>
 #include <mach/hardware.h>
 #include <mach/esdhc.h>
-#include "sdhci.h"
 #include "sdhci-pltfm.h"
 #include "sdhci-esdhc.h"
 
@@ -31,7 +29,7 @@
 #define SDHCI_VENDOR_SPEC		0xC0
 #define  SDHCI_VENDOR_SPEC_SDIO_QUIRK	0x00000002
 
-#define ESDHC_FLAG_GPIO_FOR_CD_WP	(1 << 0)
+#define ESDHC_FLAG_GPIO_FOR_CD		(1 << 0)
 /*
  * The CMDTYPE of the CMD register (offset 0xE) should be set to
  * "11" when the STOP CMD12 is issued on imx53 to abort one
@@ -67,14 +65,14 @@
 	u32 val = readl(host->ioaddr + reg);
 
 	if (unlikely((reg == SDHCI_PRESENT_STATE)
-			&& (imx_data->flags & ESDHC_FLAG_GPIO_FOR_CD_WP))) {
+			&& (imx_data->flags & ESDHC_FLAG_GPIO_FOR_CD))) {
 		struct esdhc_platform_data *boarddata =
 				host->mmc->parent->platform_data;
 
 		if (boarddata && gpio_is_valid(boarddata->cd_gpio)
 				&& gpio_get_value(boarddata->cd_gpio))
 			/* no card, if a valid gpio says so... */
-			val &= SDHCI_CARD_PRESENT;
+			val &= ~SDHCI_CARD_PRESENT;
 		else
 			/* ... in all other cases assume card is present */
 			val |= SDHCI_CARD_PRESENT;
@@ -89,7 +87,7 @@
 	struct pltfm_imx_data *imx_data = pltfm_host->priv;
 
 	if (unlikely((reg == SDHCI_INT_ENABLE || reg == SDHCI_SIGNAL_ENABLE)
-			&& (imx_data->flags & ESDHC_FLAG_GPIO_FOR_CD_WP)))
+			&& (imx_data->flags & ESDHC_FLAG_GPIO_FOR_CD)))
 		/*
 		 * these interrupts won't work with a custom card_detect gpio
 		 * (only applied to mx25/35)
@@ -191,16 +189,6 @@
 	return clk_get_rate(pltfm_host->clk) / 256 / 16;
 }
 
-static unsigned int esdhc_pltfm_get_ro(struct sdhci_host *host)
-{
-	struct esdhc_platform_data *boarddata = host->mmc->parent->platform_data;
-
-	if (boarddata && gpio_is_valid(boarddata->wp_gpio))
-		return gpio_get_value(boarddata->wp_gpio);
-	else
-		return -ENOSYS;
-}
-
 static struct sdhci_ops sdhci_esdhc_ops = {
 	.read_l = esdhc_readl_le,
 	.read_w = esdhc_readw_le,
@@ -212,6 +200,24 @@
 	.get_min_clock = esdhc_pltfm_get_min_clock,
 };
 
+static struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = {
+	.quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_BROKEN_ADMA
+			| SDHCI_QUIRK_BROKEN_CARD_DETECTION,
+	/* ADMA has issues. Might be fixable */
+	.ops = &sdhci_esdhc_ops,
+};
+
+static unsigned int esdhc_pltfm_get_ro(struct sdhci_host *host)
+{
+	struct esdhc_platform_data *boarddata =
+			host->mmc->parent->platform_data;
+
+	if (boarddata && gpio_is_valid(boarddata->wp_gpio))
+		return gpio_get_value(boarddata->wp_gpio);
+	else
+		return -ENOSYS;
+}
+
 static irqreturn_t cd_irq(int irq, void *data)
 {
 	struct sdhci_host *sdhost = (struct sdhci_host *)data;
@@ -220,30 +226,35 @@
 	return IRQ_HANDLED;
 };
 
-static int esdhc_pltfm_init(struct sdhci_host *host, struct sdhci_pltfm_data *pdata)
+static int __devinit sdhci_esdhc_imx_probe(struct platform_device *pdev)
 {
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct esdhc_platform_data *boarddata = host->mmc->parent->platform_data;
+	struct sdhci_pltfm_host *pltfm_host;
+	struct sdhci_host *host;
+	struct esdhc_platform_data *boarddata;
 	struct clk *clk;
 	int err;
 	struct pltfm_imx_data *imx_data;
 
+	host = sdhci_pltfm_init(pdev, &sdhci_esdhc_imx_pdata);
+	if (IS_ERR(host))
+		return PTR_ERR(host);
+
+	pltfm_host = sdhci_priv(host);
+
+	imx_data = kzalloc(sizeof(struct pltfm_imx_data), GFP_KERNEL);
+	if (!imx_data)
+		return -ENOMEM;
+	pltfm_host->priv = imx_data;
+
 	clk = clk_get(mmc_dev(host->mmc), NULL);
 	if (IS_ERR(clk)) {
 		dev_err(mmc_dev(host->mmc), "clk err\n");
-		return PTR_ERR(clk);
+		err = PTR_ERR(clk);
+		goto err_clk_get;
 	}
 	clk_enable(clk);
 	pltfm_host->clk = clk;
 
-	imx_data = kzalloc(sizeof(struct pltfm_imx_data), GFP_KERNEL);
-	if (!imx_data) {
-		clk_disable(pltfm_host->clk);
-		clk_put(pltfm_host->clk);
-		return -ENOMEM;
-	}
-	pltfm_host->priv = imx_data;
-
 	if (!cpu_is_mx25())
 		host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
 
@@ -257,6 +268,7 @@
 	if (!(cpu_is_mx25() || cpu_is_mx35() || cpu_is_mx51()))
 		imx_data->flags |= ESDHC_FLAG_MULTIBLK_NO_INT;
 
+	boarddata = host->mmc->parent->platform_data;
 	if (boarddata) {
 		err = gpio_request_one(boarddata->wp_gpio, GPIOF_IN, "ESDHC_WP");
 		if (err) {
@@ -284,11 +296,15 @@
 			goto no_card_detect_irq;
 		}
 
-		imx_data->flags |= ESDHC_FLAG_GPIO_FOR_CD_WP;
+		imx_data->flags |= ESDHC_FLAG_GPIO_FOR_CD;
 		/* Now we have a working card_detect again */
 		host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION;
 	}
 
+	err = sdhci_add_host(host);
+	if (err)
+		goto err_add_host;
+
 	return 0;
 
  no_card_detect_irq:
@@ -297,14 +313,23 @@
 	boarddata->cd_gpio = err;
  not_supported:
 	kfree(imx_data);
-	return 0;
+ err_add_host:
+	clk_disable(pltfm_host->clk);
+	clk_put(pltfm_host->clk);
+ err_clk_get:
+	sdhci_pltfm_free(pdev);
+	return err;
 }
 
-static void esdhc_pltfm_exit(struct sdhci_host *host)
+static int __devexit sdhci_esdhc_imx_remove(struct platform_device *pdev)
 {
+	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct esdhc_platform_data *boarddata = host->mmc->parent->platform_data;
 	struct pltfm_imx_data *imx_data = pltfm_host->priv;
+	int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff);
+
+	sdhci_remove_host(host, dead);
 
 	if (boarddata && gpio_is_valid(boarddata->wp_gpio))
 		gpio_free(boarddata->wp_gpio);
@@ -319,13 +344,37 @@
 	clk_disable(pltfm_host->clk);
 	clk_put(pltfm_host->clk);
 	kfree(imx_data);
+
+	sdhci_pltfm_free(pdev);
+
+	return 0;
 }
 
-struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = {
-	.quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_BROKEN_ADMA
-			| SDHCI_QUIRK_BROKEN_CARD_DETECTION,
-	/* ADMA has issues. Might be fixable */
-	.ops = &sdhci_esdhc_ops,
-	.init = esdhc_pltfm_init,
-	.exit = esdhc_pltfm_exit,
+static struct platform_driver sdhci_esdhc_imx_driver = {
+	.driver		= {
+		.name	= "sdhci-esdhc-imx",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= sdhci_esdhc_imx_probe,
+	.remove		= __devexit_p(sdhci_esdhc_imx_remove),
+#ifdef CONFIG_PM
+	.suspend	= sdhci_pltfm_suspend,
+	.resume		= sdhci_pltfm_resume,
+#endif
 };
+
+static int __init sdhci_esdhc_imx_init(void)
+{
+	return platform_driver_register(&sdhci_esdhc_imx_driver);
+}
+module_init(sdhci_esdhc_imx_init);
+
+static void __exit sdhci_esdhc_imx_exit(void)
+{
+	platform_driver_unregister(&sdhci_esdhc_imx_driver);
+}
+module_exit(sdhci_esdhc_imx_exit);
+
+MODULE_DESCRIPTION("SDHCI driver for Freescale i.MX eSDHC");
+MODULE_AUTHOR("Wolfram Sang <w.sang@pengutronix.de>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/sdhci-of-core.c b/drivers/mmc/host/sdhci-of-core.c
deleted file mode 100644
index 60e4186..0000000
--- a/drivers/mmc/host/sdhci-of-core.c
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * OpenFirmware bindings for Secure Digital Host Controller Interface.
- *
- * Copyright (c) 2007 Freescale Semiconductor, Inc.
- * Copyright (c) 2009 MontaVista Software, Inc.
- *
- * Authors: Xiaobo Xie <X.Xie@freescale.com>
- *	    Anton Vorontsov <avorontsov@ru.mvista.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- */
-
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/of.h>
-#include <linux/of_platform.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/mmc/host.h>
-#ifdef CONFIG_PPC
-#include <asm/machdep.h>
-#endif
-#include "sdhci-of.h"
-#include "sdhci.h"
-
-#ifdef CONFIG_MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER
-
-/*
- * These accessors are designed for big endian hosts doing I/O to
- * little endian controllers incorporating a 32-bit hardware byte swapper.
- */
-
-u32 sdhci_be32bs_readl(struct sdhci_host *host, int reg)
-{
-	return in_be32(host->ioaddr + reg);
-}
-
-u16 sdhci_be32bs_readw(struct sdhci_host *host, int reg)
-{
-	return in_be16(host->ioaddr + (reg ^ 0x2));
-}
-
-u8 sdhci_be32bs_readb(struct sdhci_host *host, int reg)
-{
-	return in_8(host->ioaddr + (reg ^ 0x3));
-}
-
-void sdhci_be32bs_writel(struct sdhci_host *host, u32 val, int reg)
-{
-	out_be32(host->ioaddr + reg, val);
-}
-
-void sdhci_be32bs_writew(struct sdhci_host *host, u16 val, int reg)
-{
-	struct sdhci_of_host *of_host = sdhci_priv(host);
-	int base = reg & ~0x3;
-	int shift = (reg & 0x2) * 8;
-
-	switch (reg) {
-	case SDHCI_TRANSFER_MODE:
-		/*
-		 * Postpone this write, we must do it together with a
-		 * command write that is down below.
-		 */
-		of_host->xfer_mode_shadow = val;
-		return;
-	case SDHCI_COMMAND:
-		sdhci_be32bs_writel(host, val << 16 | of_host->xfer_mode_shadow,
-				    SDHCI_TRANSFER_MODE);
-		return;
-	}
-	clrsetbits_be32(host->ioaddr + base, 0xffff << shift, val << shift);
-}
-
-void sdhci_be32bs_writeb(struct sdhci_host *host, u8 val, int reg)
-{
-	int base = reg & ~0x3;
-	int shift = (reg & 0x3) * 8;
-
-	clrsetbits_be32(host->ioaddr + base , 0xff << shift, val << shift);
-}
-#endif /* CONFIG_MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER */
-
-#ifdef CONFIG_PM
-
-static int sdhci_of_suspend(struct platform_device *ofdev, pm_message_t state)
-{
-	struct sdhci_host *host = dev_get_drvdata(&ofdev->dev);
-
-	return mmc_suspend_host(host->mmc);
-}
-
-static int sdhci_of_resume(struct platform_device *ofdev)
-{
-	struct sdhci_host *host = dev_get_drvdata(&ofdev->dev);
-
-	return mmc_resume_host(host->mmc);
-}
-
-#else
-
-#define sdhci_of_suspend NULL
-#define sdhci_of_resume NULL
-
-#endif
-
-static bool __devinit sdhci_of_wp_inverted(struct device_node *np)
-{
-	if (of_get_property(np, "sdhci,wp-inverted", NULL))
-		return true;
-
-	/* Old device trees don't have the wp-inverted property. */
-#ifdef CONFIG_PPC
-	return machine_is(mpc837x_rdb) || machine_is(mpc837x_mds);
-#else
-	return false;
-#endif
-}
-
-static const struct of_device_id sdhci_of_match[];
-static int __devinit sdhci_of_probe(struct platform_device *ofdev)
-{
-	const struct of_device_id *match;
-	struct device_node *np = ofdev->dev.of_node;
-	struct sdhci_of_data *sdhci_of_data;
-	struct sdhci_host *host;
-	struct sdhci_of_host *of_host;
-	const __be32 *clk;
-	int size;
-	int ret;
-
-	match = of_match_device(sdhci_of_match, &ofdev->dev);
-	if (!match)
-		return -EINVAL;
-	sdhci_of_data = match->data;
-
-	if (!of_device_is_available(np))
-		return -ENODEV;
-
-	host = sdhci_alloc_host(&ofdev->dev, sizeof(*of_host));
-	if (IS_ERR(host))
-		return -ENOMEM;
-
-	of_host = sdhci_priv(host);
-	dev_set_drvdata(&ofdev->dev, host);
-
-	host->ioaddr = of_iomap(np, 0);
-	if (!host->ioaddr) {
-		ret = -ENOMEM;
-		goto err_addr_map;
-	}
-
-	host->irq = irq_of_parse_and_map(np, 0);
-	if (!host->irq) {
-		ret = -EINVAL;
-		goto err_no_irq;
-	}
-
-	host->hw_name = dev_name(&ofdev->dev);
-	if (sdhci_of_data) {
-		host->quirks = sdhci_of_data->quirks;
-		host->ops = &sdhci_of_data->ops;
-	}
-
-	if (of_get_property(np, "sdhci,auto-cmd12", NULL))
-		host->quirks |= SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12;
-
-
-	if (of_get_property(np, "sdhci,1-bit-only", NULL))
-		host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA;
-
-	if (sdhci_of_wp_inverted(np))
-		host->quirks |= SDHCI_QUIRK_INVERTED_WRITE_PROTECT;
-
-	clk = of_get_property(np, "clock-frequency", &size);
-	if (clk && size == sizeof(*clk) && *clk)
-		of_host->clock = be32_to_cpup(clk);
-
-	ret = sdhci_add_host(host);
-	if (ret)
-		goto err_add_host;
-
-	return 0;
-
-err_add_host:
-	irq_dispose_mapping(host->irq);
-err_no_irq:
-	iounmap(host->ioaddr);
-err_addr_map:
-	sdhci_free_host(host);
-	return ret;
-}
-
-static int __devexit sdhci_of_remove(struct platform_device *ofdev)
-{
-	struct sdhci_host *host = dev_get_drvdata(&ofdev->dev);
-
-	sdhci_remove_host(host, 0);
-	sdhci_free_host(host);
-	irq_dispose_mapping(host->irq);
-	iounmap(host->ioaddr);
-	return 0;
-}
-
-static const struct of_device_id sdhci_of_match[] = {
-#ifdef CONFIG_MMC_SDHCI_OF_ESDHC
-	{ .compatible = "fsl,mpc8379-esdhc", .data = &sdhci_esdhc, },
-	{ .compatible = "fsl,mpc8536-esdhc", .data = &sdhci_esdhc, },
-	{ .compatible = "fsl,esdhc", .data = &sdhci_esdhc, },
-#endif
-#ifdef CONFIG_MMC_SDHCI_OF_HLWD
-	{ .compatible = "nintendo,hollywood-sdhci", .data = &sdhci_hlwd, },
-#endif
-	{ .compatible = "generic-sdhci", },
-	{},
-};
-MODULE_DEVICE_TABLE(of, sdhci_of_match);
-
-static struct platform_driver sdhci_of_driver = {
-	.driver = {
-		.name = "sdhci-of",
-		.owner = THIS_MODULE,
-		.of_match_table = sdhci_of_match,
-	},
-	.probe = sdhci_of_probe,
-	.remove = __devexit_p(sdhci_of_remove),
-	.suspend = sdhci_of_suspend,
-	.resume	= sdhci_of_resume,
-};
-
-static int __init sdhci_of_init(void)
-{
-	return platform_driver_register(&sdhci_of_driver);
-}
-module_init(sdhci_of_init);
-
-static void __exit sdhci_of_exit(void)
-{
-	platform_driver_unregister(&sdhci_of_driver);
-}
-module_exit(sdhci_of_exit);
-
-MODULE_DESCRIPTION("Secure Digital Host Controller Interface OF driver");
-MODULE_AUTHOR("Xiaobo Xie <X.Xie@freescale.com>, "
-	      "Anton Vorontsov <avorontsov@ru.mvista.com>");
-MODULE_LICENSE("GPL");
diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
index ba40d6d..fe604df 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c
@@ -16,8 +16,7 @@
 #include <linux/io.h>
 #include <linux/delay.h>
 #include <linux/mmc/host.h>
-#include "sdhci-of.h"
-#include "sdhci.h"
+#include "sdhci-pltfm.h"
 #include "sdhci-esdhc.h"
 
 static u16 esdhc_readw(struct sdhci_host *host, int reg)
@@ -60,32 +59,83 @@
 
 static unsigned int esdhc_of_get_max_clock(struct sdhci_host *host)
 {
-	struct sdhci_of_host *of_host = sdhci_priv(host);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 
-	return of_host->clock;
+	return pltfm_host->clock;
 }
 
 static unsigned int esdhc_of_get_min_clock(struct sdhci_host *host)
 {
-	struct sdhci_of_host *of_host = sdhci_priv(host);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 
-	return of_host->clock / 256 / 16;
+	return pltfm_host->clock / 256 / 16;
 }
 
-struct sdhci_of_data sdhci_esdhc = {
+static struct sdhci_ops sdhci_esdhc_ops = {
+	.read_l = sdhci_be32bs_readl,
+	.read_w = esdhc_readw,
+	.read_b = sdhci_be32bs_readb,
+	.write_l = sdhci_be32bs_writel,
+	.write_w = esdhc_writew,
+	.write_b = esdhc_writeb,
+	.set_clock = esdhc_set_clock,
+	.enable_dma = esdhc_of_enable_dma,
+	.get_max_clock = esdhc_of_get_max_clock,
+	.get_min_clock = esdhc_of_get_min_clock,
+};
+
+static struct sdhci_pltfm_data sdhci_esdhc_pdata = {
 	/* card detection could be handled via GPIO */
 	.quirks = ESDHC_DEFAULT_QUIRKS | SDHCI_QUIRK_BROKEN_CARD_DETECTION
 		| SDHCI_QUIRK_NO_CARD_NO_RESET,
-	.ops = {
-		.read_l = sdhci_be32bs_readl,
-		.read_w = esdhc_readw,
-		.read_b = sdhci_be32bs_readb,
-		.write_l = sdhci_be32bs_writel,
-		.write_w = esdhc_writew,
-		.write_b = esdhc_writeb,
-		.set_clock = esdhc_set_clock,
-		.enable_dma = esdhc_of_enable_dma,
-		.get_max_clock = esdhc_of_get_max_clock,
-		.get_min_clock = esdhc_of_get_min_clock,
-	},
+	.ops = &sdhci_esdhc_ops,
 };
+
+static int __devinit sdhci_esdhc_probe(struct platform_device *pdev)
+{
+	return sdhci_pltfm_register(pdev, &sdhci_esdhc_pdata);
+}
+
+static int __devexit sdhci_esdhc_remove(struct platform_device *pdev)
+{
+	return sdhci_pltfm_unregister(pdev);
+}
+
+static const struct of_device_id sdhci_esdhc_of_match[] = {
+	{ .compatible = "fsl,mpc8379-esdhc" },
+	{ .compatible = "fsl,mpc8536-esdhc" },
+	{ .compatible = "fsl,esdhc" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, sdhci_esdhc_of_match);
+
+static struct platform_driver sdhci_esdhc_driver = {
+	.driver = {
+		.name = "sdhci-esdhc",
+		.owner = THIS_MODULE,
+		.of_match_table = sdhci_esdhc_of_match,
+	},
+	.probe = sdhci_esdhc_probe,
+	.remove = __devexit_p(sdhci_esdhc_remove),
+#ifdef CONFIG_PM
+	.suspend = sdhci_pltfm_suspend,
+	.resume = sdhci_pltfm_resume,
+#endif
+};
+
+static int __init sdhci_esdhc_init(void)
+{
+	return platform_driver_register(&sdhci_esdhc_driver);
+}
+module_init(sdhci_esdhc_init);
+
+static void __exit sdhci_esdhc_exit(void)
+{
+	platform_driver_unregister(&sdhci_esdhc_driver);
+}
+module_exit(sdhci_esdhc_exit);
+
+MODULE_DESCRIPTION("SDHCI OF driver for Freescale MPC eSDHC");
+MODULE_AUTHOR("Xiaobo Xie <X.Xie@freescale.com>, "
+	      "Anton Vorontsov <avorontsov@ru.mvista.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/sdhci-of-hlwd.c b/drivers/mmc/host/sdhci-of-hlwd.c
index 68ddb75..735be13 100644
--- a/drivers/mmc/host/sdhci-of-hlwd.c
+++ b/drivers/mmc/host/sdhci-of-hlwd.c
@@ -21,8 +21,7 @@
 
 #include <linux/delay.h>
 #include <linux/mmc/host.h>
-#include "sdhci-of.h"
-#include "sdhci.h"
+#include "sdhci-pltfm.h"
 
 /*
  * Ops and quirks for the Nintendo Wii SDHCI controllers.
@@ -51,15 +50,63 @@
 	udelay(SDHCI_HLWD_WRITE_DELAY);
 }
 
-struct sdhci_of_data sdhci_hlwd = {
+static struct sdhci_ops sdhci_hlwd_ops = {
+	.read_l = sdhci_be32bs_readl,
+	.read_w = sdhci_be32bs_readw,
+	.read_b = sdhci_be32bs_readb,
+	.write_l = sdhci_hlwd_writel,
+	.write_w = sdhci_hlwd_writew,
+	.write_b = sdhci_hlwd_writeb,
+};
+
+static struct sdhci_pltfm_data sdhci_hlwd_pdata = {
 	.quirks = SDHCI_QUIRK_32BIT_DMA_ADDR |
 		  SDHCI_QUIRK_32BIT_DMA_SIZE,
-	.ops = {
-		.read_l = sdhci_be32bs_readl,
-		.read_w = sdhci_be32bs_readw,
-		.read_b = sdhci_be32bs_readb,
-		.write_l = sdhci_hlwd_writel,
-		.write_w = sdhci_hlwd_writew,
-		.write_b = sdhci_hlwd_writeb,
-	},
+	.ops = &sdhci_hlwd_ops,
 };
+
+static int __devinit sdhci_hlwd_probe(struct platform_device *pdev)
+{
+	return sdhci_pltfm_register(pdev, &sdhci_hlwd_pdata);
+}
+
+static int __devexit sdhci_hlwd_remove(struct platform_device *pdev)
+{
+	return sdhci_pltfm_unregister(pdev);
+}
+
+static const struct of_device_id sdhci_hlwd_of_match[] = {
+	{ .compatible = "nintendo,hollywood-sdhci" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, sdhci_hlwd_of_match);
+
+static struct platform_driver sdhci_hlwd_driver = {
+	.driver = {
+		.name = "sdhci-hlwd",
+		.owner = THIS_MODULE,
+		.of_match_table = sdhci_hlwd_of_match,
+	},
+	.probe = sdhci_hlwd_probe,
+	.remove = __devexit_p(sdhci_hlwd_remove),
+#ifdef CONFIG_PM
+	.suspend = sdhci_pltfm_suspend,
+	.resume = sdhci_pltfm_resume,
+#endif
+};
+
+static int __init sdhci_hlwd_init(void)
+{
+	return platform_driver_register(&sdhci_hlwd_driver);
+}
+module_init(sdhci_hlwd_init);
+
+static void __exit sdhci_hlwd_exit(void)
+{
+	platform_driver_unregister(&sdhci_hlwd_driver);
+}
+module_exit(sdhci_hlwd_exit);
+
+MODULE_DESCRIPTION("Nintendo Wii SDHCI OF driver");
+MODULE_AUTHOR("The GameCube Linux Team, Albert Herranz");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/sdhci-of.h b/drivers/mmc/host/sdhci-of.h
deleted file mode 100644
index ad09ad9..0000000
--- a/drivers/mmc/host/sdhci-of.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * OpenFirmware bindings for Secure Digital Host Controller Interface.
- *
- * Copyright (c) 2007 Freescale Semiconductor, Inc.
- * Copyright (c) 2009 MontaVista Software, Inc.
- *
- * Authors: Xiaobo Xie <X.Xie@freescale.com>
- *	    Anton Vorontsov <avorontsov@ru.mvista.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- */
-
-#ifndef __SDHCI_OF_H
-#define __SDHCI_OF_H
-
-#include <linux/types.h>
-#include "sdhci.h"
-
-struct sdhci_of_data {
-	unsigned int quirks;
-	struct sdhci_ops ops;
-};
-
-struct sdhci_of_host {
-	unsigned int clock;
-	u16 xfer_mode_shadow;
-};
-
-extern u32 sdhci_be32bs_readl(struct sdhci_host *host, int reg);
-extern u16 sdhci_be32bs_readw(struct sdhci_host *host, int reg);
-extern u8 sdhci_be32bs_readb(struct sdhci_host *host, int reg);
-extern void sdhci_be32bs_writel(struct sdhci_host *host, u32 val, int reg);
-extern void sdhci_be32bs_writew(struct sdhci_host *host, u16 val, int reg);
-extern void sdhci_be32bs_writeb(struct sdhci_host *host, u8 val, int reg);
-
-extern struct sdhci_of_data sdhci_esdhc;
-extern struct sdhci_of_data sdhci_hlwd;
-
-#endif /* __SDHCI_OF_H */
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 936bbca..26c5286 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -143,6 +143,12 @@
 			  SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
 };
 
+static int mrst_hc_probe_slot(struct sdhci_pci_slot *slot)
+{
+	slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA;
+	return 0;
+}
+
 /*
  * ADMA operation is disabled for Moorestown platform due to
  * hardware bugs.
@@ -157,8 +163,15 @@
 	return 0;
 }
 
+static int mfd_emmc_probe_slot(struct sdhci_pci_slot *slot)
+{
+	slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA;
+	return 0;
+}
+
 static const struct sdhci_pci_fixes sdhci_intel_mrst_hc0 = {
 	.quirks		= SDHCI_QUIRK_BROKEN_ADMA | SDHCI_QUIRK_NO_HISPD_BIT,
+	.probe_slot	= mrst_hc_probe_slot,
 };
 
 static const struct sdhci_pci_fixes sdhci_intel_mrst_hc1_hc2 = {
@@ -170,10 +183,15 @@
 	.quirks		= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
 };
 
-static const struct sdhci_pci_fixes sdhci_intel_mfd_emmc_sdio = {
+static const struct sdhci_pci_fixes sdhci_intel_mfd_sdio = {
 	.quirks		= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
 };
 
+static const struct sdhci_pci_fixes sdhci_intel_mfd_emmc = {
+	.quirks		= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
+	.probe_slot	= mfd_emmc_probe_slot,
+};
+
 /* O2Micro extra registers */
 #define O2_SD_LOCK_WP		0xD3
 #define O2_SD_MULTI_VCC3V	0xEE
@@ -682,7 +700,7 @@
 		.device		= PCI_DEVICE_ID_INTEL_MFD_SDIO1,
 		.subvendor	= PCI_ANY_ID,
 		.subdevice	= PCI_ANY_ID,
-		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_sdio,
 	},
 
 	{
@@ -690,7 +708,7 @@
 		.device		= PCI_DEVICE_ID_INTEL_MFD_SDIO2,
 		.subvendor	= PCI_ANY_ID,
 		.subdevice	= PCI_ANY_ID,
-		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_sdio,
 	},
 
 	{
@@ -698,7 +716,7 @@
 		.device		= PCI_DEVICE_ID_INTEL_MFD_EMMC0,
 		.subvendor	= PCI_ANY_ID,
 		.subdevice	= PCI_ANY_ID,
-		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_emmc,
 	},
 
 	{
@@ -706,7 +724,7 @@
 		.device		= PCI_DEVICE_ID_INTEL_MFD_EMMC1,
 		.subvendor	= PCI_ANY_ID,
 		.subdevice	= PCI_ANY_ID,
-		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_emmc_sdio,
+		.driver_data	= (kernel_ulong_t)&sdhci_intel_mfd_emmc,
 	},
 
 	{
@@ -789,8 +807,34 @@
 	return 0;
 }
 
+static int sdhci_pci_8bit_width(struct sdhci_host *host, int width)
+{
+	u8 ctrl;
+
+	ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
+
+	switch (width) {
+	case MMC_BUS_WIDTH_8:
+		ctrl |= SDHCI_CTRL_8BITBUS;
+		ctrl &= ~SDHCI_CTRL_4BITBUS;
+		break;
+	case MMC_BUS_WIDTH_4:
+		ctrl |= SDHCI_CTRL_4BITBUS;
+		ctrl &= ~SDHCI_CTRL_8BITBUS;
+		break;
+	default:
+		ctrl &= ~(SDHCI_CTRL_8BITBUS | SDHCI_CTRL_4BITBUS);
+		break;
+	}
+
+	sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
+
+	return 0;
+}
+
 static struct sdhci_ops sdhci_pci_ops = {
 	.enable_dma	= sdhci_pci_enable_dma,
+	.platform_8bit_width	= sdhci_pci_8bit_width,
 };
 
 /*****************************************************************************\
diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index dbab040..71c0ce1 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c
@@ -2,6 +2,12 @@
  * sdhci-pltfm.c Support for SDHCI platform devices
  * Copyright (c) 2009 Intel Corporation
  *
+ * Copyright (c) 2007 Freescale Semiconductor, Inc.
+ * Copyright (c) 2009 MontaVista Software, Inc.
+ *
+ * Authors: Xiaobo Xie <X.Xie@freescale.com>
+ *	    Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -22,48 +28,66 @@
  * Inspired by sdhci-pci.c, by Pierre Ossman
  */
 
-#include <linux/delay.h>
-#include <linux/highmem.h>
-#include <linux/mod_devicetable.h>
-#include <linux/platform_device.h>
-
-#include <linux/mmc/host.h>
-
-#include <linux/io.h>
-#include <linux/mmc/sdhci-pltfm.h>
-
-#include "sdhci.h"
+#include <linux/err.h>
+#include <linux/of.h>
+#ifdef CONFIG_PPC
+#include <asm/machdep.h>
+#endif
 #include "sdhci-pltfm.h"
 
-/*****************************************************************************\
- *                                                                           *
- * SDHCI core callbacks                                                      *
- *                                                                           *
-\*****************************************************************************/
-
 static struct sdhci_ops sdhci_pltfm_ops = {
 };
 
-/*****************************************************************************\
- *                                                                           *
- * Device probing/removal                                                    *
- *                                                                           *
-\*****************************************************************************/
-
-static int __devinit sdhci_pltfm_probe(struct platform_device *pdev)
+#ifdef CONFIG_OF
+static bool sdhci_of_wp_inverted(struct device_node *np)
 {
-	const struct platform_device_id *platid = platform_get_device_id(pdev);
-	struct sdhci_pltfm_data *pdata;
+	if (of_get_property(np, "sdhci,wp-inverted", NULL))
+		return true;
+
+	/* Old device trees don't have the wp-inverted property. */
+#ifdef CONFIG_PPC
+	return machine_is(mpc837x_rdb) || machine_is(mpc837x_mds);
+#else
+	return false;
+#endif /* CONFIG_PPC */
+}
+
+void sdhci_get_of_property(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct sdhci_host *host = platform_get_drvdata(pdev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	const __be32 *clk;
+	int size;
+
+	if (of_device_is_available(np)) {
+		if (of_get_property(np, "sdhci,auto-cmd12", NULL))
+			host->quirks |= SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12;
+
+		if (of_get_property(np, "sdhci,1-bit-only", NULL))
+			host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA;
+
+		if (sdhci_of_wp_inverted(np))
+			host->quirks |= SDHCI_QUIRK_INVERTED_WRITE_PROTECT;
+
+		clk = of_get_property(np, "clock-frequency", &size);
+		if (clk && size == sizeof(*clk) && *clk)
+			pltfm_host->clock = be32_to_cpup(clk);
+	}
+}
+#else
+void sdhci_get_of_property(struct platform_device *pdev) {}
+#endif /* CONFIG_OF */
+EXPORT_SYMBOL_GPL(sdhci_get_of_property);
+
+struct sdhci_host *sdhci_pltfm_init(struct platform_device *pdev,
+				    struct sdhci_pltfm_data *pdata)
+{
 	struct sdhci_host *host;
 	struct sdhci_pltfm_host *pltfm_host;
 	struct resource *iomem;
 	int ret;
 
-	if (platid && platid->driver_data)
-		pdata = (void *)platid->driver_data;
-	else
-		pdata = pdev->dev.platform_data;
-
 	iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!iomem) {
 		ret = -ENOMEM;
@@ -71,8 +95,7 @@
 	}
 
 	if (resource_size(iomem) < 0x100)
-		dev_err(&pdev->dev, "Invalid iomem size. You may "
-			"experience problems.\n");
+		dev_err(&pdev->dev, "Invalid iomem size!\n");
 
 	/* Some PCI-based MFD need the parent here */
 	if (pdev->dev.parent != &platform_bus)
@@ -87,7 +110,7 @@
 
 	pltfm_host = sdhci_priv(host);
 
-	host->hw_name = "platform";
+	host->hw_name = dev_name(&pdev->dev);
 	if (pdata && pdata->ops)
 		host->ops = pdata->ops;
 	else
@@ -110,126 +133,95 @@
 		goto err_remap;
 	}
 
-	if (pdata && pdata->init) {
-		ret = pdata->init(host, pdata);
-		if (ret)
-			goto err_plat_init;
-	}
-
-	ret = sdhci_add_host(host);
-	if (ret)
-		goto err_add_host;
-
 	platform_set_drvdata(pdev, host);
 
-	return 0;
+	return host;
 
-err_add_host:
-	if (pdata && pdata->exit)
-		pdata->exit(host);
-err_plat_init:
-	iounmap(host->ioaddr);
 err_remap:
 	release_mem_region(iomem->start, resource_size(iomem));
 err_request:
 	sdhci_free_host(host);
 err:
-	printk(KERN_ERR"Probing of sdhci-pltfm failed: %d\n", ret);
-	return ret;
+	dev_err(&pdev->dev, "%s failed %d\n", __func__, ret);
+	return ERR_PTR(ret);
 }
+EXPORT_SYMBOL_GPL(sdhci_pltfm_init);
 
-static int __devexit sdhci_pltfm_remove(struct platform_device *pdev)
+void sdhci_pltfm_free(struct platform_device *pdev)
 {
-	struct sdhci_pltfm_data *pdata = pdev->dev.platform_data;
 	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct resource *iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	int dead;
-	u32 scratch;
 
-	dead = 0;
-	scratch = readl(host->ioaddr + SDHCI_INT_STATUS);
-	if (scratch == (u32)-1)
-		dead = 1;
-
-	sdhci_remove_host(host, dead);
-	if (pdata && pdata->exit)
-		pdata->exit(host);
 	iounmap(host->ioaddr);
 	release_mem_region(iomem->start, resource_size(iomem));
 	sdhci_free_host(host);
 	platform_set_drvdata(pdev, NULL);
+}
+EXPORT_SYMBOL_GPL(sdhci_pltfm_free);
+
+int sdhci_pltfm_register(struct platform_device *pdev,
+			 struct sdhci_pltfm_data *pdata)
+{
+	struct sdhci_host *host;
+	int ret = 0;
+
+	host = sdhci_pltfm_init(pdev, pdata);
+	if (IS_ERR(host))
+		return PTR_ERR(host);
+
+	sdhci_get_of_property(pdev);
+
+	ret = sdhci_add_host(host);
+	if (ret)
+		sdhci_pltfm_free(pdev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sdhci_pltfm_register);
+
+int sdhci_pltfm_unregister(struct platform_device *pdev)
+{
+	struct sdhci_host *host = platform_get_drvdata(pdev);
+	int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff);
+
+	sdhci_remove_host(host, dead);
+	sdhci_pltfm_free(pdev);
 
 	return 0;
 }
-
-static const struct platform_device_id sdhci_pltfm_ids[] = {
-	{ "sdhci", },
-#ifdef CONFIG_MMC_SDHCI_CNS3XXX
-	{ "sdhci-cns3xxx", (kernel_ulong_t)&sdhci_cns3xxx_pdata },
-#endif
-#ifdef CONFIG_MMC_SDHCI_ESDHC_IMX
-	{ "sdhci-esdhc-imx", (kernel_ulong_t)&sdhci_esdhc_imx_pdata },
-#endif
-#ifdef CONFIG_MMC_SDHCI_DOVE
-	{ "sdhci-dove", (kernel_ulong_t)&sdhci_dove_pdata },
-#endif
-#ifdef CONFIG_MMC_SDHCI_TEGRA
-	{ "sdhci-tegra", (kernel_ulong_t)&sdhci_tegra_pdata },
-#endif
-	{ },
-};
-MODULE_DEVICE_TABLE(platform, sdhci_pltfm_ids);
+EXPORT_SYMBOL_GPL(sdhci_pltfm_unregister);
 
 #ifdef CONFIG_PM
-static int sdhci_pltfm_suspend(struct platform_device *dev, pm_message_t state)
+int sdhci_pltfm_suspend(struct platform_device *dev, pm_message_t state)
 {
 	struct sdhci_host *host = platform_get_drvdata(dev);
 
 	return sdhci_suspend_host(host, state);
 }
+EXPORT_SYMBOL_GPL(sdhci_pltfm_suspend);
 
-static int sdhci_pltfm_resume(struct platform_device *dev)
+int sdhci_pltfm_resume(struct platform_device *dev)
 {
 	struct sdhci_host *host = platform_get_drvdata(dev);
 
 	return sdhci_resume_host(host);
 }
-#else
-#define sdhci_pltfm_suspend	NULL
-#define sdhci_pltfm_resume	NULL
+EXPORT_SYMBOL_GPL(sdhci_pltfm_resume);
 #endif	/* CONFIG_PM */
 
-static struct platform_driver sdhci_pltfm_driver = {
-	.driver = {
-		.name	= "sdhci",
-		.owner	= THIS_MODULE,
-	},
-	.probe		= sdhci_pltfm_probe,
-	.remove		= __devexit_p(sdhci_pltfm_remove),
-	.id_table	= sdhci_pltfm_ids,
-	.suspend	= sdhci_pltfm_suspend,
-	.resume		= sdhci_pltfm_resume,
-};
-
-/*****************************************************************************\
- *                                                                           *
- * Driver init/exit                                                          *
- *                                                                           *
-\*****************************************************************************/
-
-static int __init sdhci_drv_init(void)
+static int __init sdhci_pltfm_drv_init(void)
 {
-	return platform_driver_register(&sdhci_pltfm_driver);
-}
+	pr_info("sdhci-pltfm: SDHCI platform and OF driver helper\n");
 
-static void __exit sdhci_drv_exit(void)
+	return 0;
+}
+module_init(sdhci_pltfm_drv_init);
+
+static void __exit sdhci_pltfm_drv_exit(void)
 {
-	platform_driver_unregister(&sdhci_pltfm_driver);
 }
+module_exit(sdhci_pltfm_drv_exit);
 
-module_init(sdhci_drv_init);
-module_exit(sdhci_drv_exit);
-
-MODULE_DESCRIPTION("Secure Digital Host Controller Interface platform driver");
-MODULE_AUTHOR("Mocean Laboratories <info@mocean-labs.com>");
+MODULE_DESCRIPTION("SDHCI platform and OF driver helper");
+MODULE_AUTHOR("Intel Corporation");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/sdhci-pltfm.h b/drivers/mmc/host/sdhci-pltfm.h
index 2b37016..3a9fc3f 100644
--- a/drivers/mmc/host/sdhci-pltfm.h
+++ b/drivers/mmc/host/sdhci-pltfm.h
@@ -12,17 +12,95 @@
 #define _DRIVERS_MMC_SDHCI_PLTFM_H
 
 #include <linux/clk.h>
-#include <linux/types.h>
-#include <linux/mmc/sdhci-pltfm.h>
+#include <linux/platform_device.h>
+#include "sdhci.h"
+
+struct sdhci_pltfm_data {
+	struct sdhci_ops *ops;
+	unsigned int quirks;
+};
 
 struct sdhci_pltfm_host {
 	struct clk *clk;
 	void *priv; /* to handle quirks across io-accessor calls */
+
+	/* migrate from sdhci_of_host */
+	unsigned int clock;
+	u16 xfer_mode_shadow;
 };
 
-extern struct sdhci_pltfm_data sdhci_cns3xxx_pdata;
-extern struct sdhci_pltfm_data sdhci_esdhc_imx_pdata;
-extern struct sdhci_pltfm_data sdhci_dove_pdata;
-extern struct sdhci_pltfm_data sdhci_tegra_pdata;
+#ifdef CONFIG_MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER
+/*
+ * These accessors are designed for big endian hosts doing I/O to
+ * little endian controllers incorporating a 32-bit hardware byte swapper.
+ */
+static inline u32 sdhci_be32bs_readl(struct sdhci_host *host, int reg)
+{
+	return in_be32(host->ioaddr + reg);
+}
+
+static inline u16 sdhci_be32bs_readw(struct sdhci_host *host, int reg)
+{
+	return in_be16(host->ioaddr + (reg ^ 0x2));
+}
+
+static inline u8 sdhci_be32bs_readb(struct sdhci_host *host, int reg)
+{
+	return in_8(host->ioaddr + (reg ^ 0x3));
+}
+
+static inline void sdhci_be32bs_writel(struct sdhci_host *host,
+				       u32 val, int reg)
+{
+	out_be32(host->ioaddr + reg, val);
+}
+
+static inline void sdhci_be32bs_writew(struct sdhci_host *host,
+				       u16 val, int reg)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	int base = reg & ~0x3;
+	int shift = (reg & 0x2) * 8;
+
+	switch (reg) {
+	case SDHCI_TRANSFER_MODE:
+		/*
+		 * Postpone this write, we must do it together with a
+		 * command write that is down below.
+		 */
+		pltfm_host->xfer_mode_shadow = val;
+		return;
+	case SDHCI_COMMAND:
+		sdhci_be32bs_writel(host,
+				    val << 16 | pltfm_host->xfer_mode_shadow,
+				    SDHCI_TRANSFER_MODE);
+		return;
+	}
+	clrsetbits_be32(host->ioaddr + base, 0xffff << shift, val << shift);
+}
+
+static inline void sdhci_be32bs_writeb(struct sdhci_host *host, u8 val, int reg)
+{
+	int base = reg & ~0x3;
+	int shift = (reg & 0x3) * 8;
+
+	clrsetbits_be32(host->ioaddr + base , 0xff << shift, val << shift);
+}
+#endif /* CONFIG_MMC_SDHCI_BIG_ENDIAN_32BIT_BYTE_SWAPPER */
+
+extern void sdhci_get_of_property(struct platform_device *pdev);
+
+extern struct sdhci_host *sdhci_pltfm_init(struct platform_device *pdev,
+					   struct sdhci_pltfm_data *pdata);
+extern void sdhci_pltfm_free(struct platform_device *pdev);
+
+extern int sdhci_pltfm_register(struct platform_device *pdev,
+				struct sdhci_pltfm_data *pdata);
+extern int sdhci_pltfm_unregister(struct platform_device *pdev);
+
+#ifdef CONFIG_PM
+extern int sdhci_pltfm_suspend(struct platform_device *dev, pm_message_t state);
+extern int sdhci_pltfm_resume(struct platform_device *dev);
+#endif
 
 #endif /* _DRIVERS_MMC_SDHCI_PLTFM_H */
diff --git a/drivers/mmc/host/sdhci-pxa.c b/drivers/mmc/host/sdhci-pxa.c
deleted file mode 100644
index 089c9a6..0000000
--- a/drivers/mmc/host/sdhci-pxa.c
+++ /dev/null
@@ -1,303 +0,0 @@
-/* linux/drivers/mmc/host/sdhci-pxa.c
- *
- * Copyright (C) 2010 Marvell International Ltd.
- *		Zhangfei Gao <zhangfei.gao@marvell.com>
- *		Kevin Wang <dwang4@marvell.com>
- *		Mingwei Wang <mwwang@marvell.com>
- *		Philip Rakity <prakity@marvell.com>
- *		Mark Brown <markb@marvell.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/* Supports:
- * SDHCI support for MMP2/PXA910/PXA168
- *
- * Refer to sdhci-s3c.c.
- */
-
-#include <linux/delay.h>
-#include <linux/platform_device.h>
-#include <linux/mmc/host.h>
-#include <linux/clk.h>
-#include <linux/io.h>
-#include <linux/err.h>
-#include <plat/sdhci.h>
-#include "sdhci.h"
-
-#define DRIVER_NAME	"sdhci-pxa"
-
-#define SD_FIFO_PARAM		0x104
-#define DIS_PAD_SD_CLK_GATE	0x400
-
-struct sdhci_pxa {
-	struct sdhci_host		*host;
-	struct sdhci_pxa_platdata	*pdata;
-	struct clk			*clk;
-	struct resource			*res;
-
-	u8 clk_enable;
-};
-
-/*****************************************************************************\
- *                                                                           *
- * SDHCI core callbacks                                                      *
- *                                                                           *
-\*****************************************************************************/
-static void set_clock(struct sdhci_host *host, unsigned int clock)
-{
-	struct sdhci_pxa *pxa = sdhci_priv(host);
-	u32 tmp = 0;
-
-	if (clock == 0) {
-		if (pxa->clk_enable) {
-			clk_disable(pxa->clk);
-			pxa->clk_enable = 0;
-		}
-	} else {
-		if (0 == pxa->clk_enable) {
-			if (pxa->pdata->flags & PXA_FLAG_DISABLE_CLOCK_GATING) {
-				tmp = readl(host->ioaddr + SD_FIFO_PARAM);
-				tmp |= DIS_PAD_SD_CLK_GATE;
-				writel(tmp, host->ioaddr + SD_FIFO_PARAM);
-			}
-			clk_enable(pxa->clk);
-			pxa->clk_enable = 1;
-		}
-	}
-}
-
-static int set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
-{
-	u16 ctrl_2;
-
-	/*
-	 * Set V18_EN -- UHS modes do not work without this.
-	 * does not change signaling voltage
-	 */
-	ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
-
-	/* Select Bus Speed Mode for host */
-	ctrl_2 &= ~SDHCI_CTRL_UHS_MASK;
-	switch (uhs) {
-	case MMC_TIMING_UHS_SDR12:
-		ctrl_2 |= SDHCI_CTRL_UHS_SDR12;
-		break;
-	case MMC_TIMING_UHS_SDR25:
-		ctrl_2 |= SDHCI_CTRL_UHS_SDR25;
-		break;
-	case MMC_TIMING_UHS_SDR50:
-		ctrl_2 |= SDHCI_CTRL_UHS_SDR50 | SDHCI_CTRL_VDD_180;
-		break;
-	case MMC_TIMING_UHS_SDR104:
-		ctrl_2 |= SDHCI_CTRL_UHS_SDR104 | SDHCI_CTRL_VDD_180;
-		break;
-	case MMC_TIMING_UHS_DDR50:
-		ctrl_2 |= SDHCI_CTRL_UHS_DDR50 | SDHCI_CTRL_VDD_180;
-		break;
-	}
-
-	sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
-	pr_debug("%s:%s uhs = %d, ctrl_2 = %04X\n",
-		__func__, mmc_hostname(host->mmc), uhs, ctrl_2);
-
-	return 0;
-}
-
-static struct sdhci_ops sdhci_pxa_ops = {
-	.set_uhs_signaling = set_uhs_signaling,
-	.set_clock = set_clock,
-};
-
-/*****************************************************************************\
- *                                                                           *
- * Device probing/removal                                                    *
- *                                                                           *
-\*****************************************************************************/
-
-static int __devinit sdhci_pxa_probe(struct platform_device *pdev)
-{
-	struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data;
-	struct device *dev = &pdev->dev;
-	struct sdhci_host *host = NULL;
-	struct resource *iomem = NULL;
-	struct sdhci_pxa *pxa = NULL;
-	int ret, irq;
-
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(dev, "no irq specified\n");
-		return irq;
-	}
-
-	iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!iomem) {
-		dev_err(dev, "no memory specified\n");
-		return -ENOENT;
-	}
-
-	host = sdhci_alloc_host(&pdev->dev, sizeof(struct sdhci_pxa));
-	if (IS_ERR(host)) {
-		dev_err(dev, "failed to alloc host\n");
-		return PTR_ERR(host);
-	}
-
-	pxa = sdhci_priv(host);
-	pxa->host = host;
-	pxa->pdata = pdata;
-	pxa->clk_enable = 0;
-
-	pxa->clk = clk_get(dev, "PXA-SDHCLK");
-	if (IS_ERR(pxa->clk)) {
-		dev_err(dev, "failed to get io clock\n");
-		ret = PTR_ERR(pxa->clk);
-		goto out;
-	}
-
-	pxa->res = request_mem_region(iomem->start, resource_size(iomem),
-				      mmc_hostname(host->mmc));
-	if (!pxa->res) {
-		dev_err(&pdev->dev, "cannot request region\n");
-		ret = -EBUSY;
-		goto out;
-	}
-
-	host->ioaddr = ioremap(iomem->start, resource_size(iomem));
-	if (!host->ioaddr) {
-		dev_err(&pdev->dev, "failed to remap registers\n");
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	host->hw_name = "MMC";
-	host->ops = &sdhci_pxa_ops;
-	host->irq = irq;
-	host->quirks = SDHCI_QUIRK_BROKEN_ADMA
-		| SDHCI_QUIRK_BROKEN_TIMEOUT_VAL
-		| SDHCI_QUIRK_32BIT_DMA_ADDR
-		| SDHCI_QUIRK_32BIT_DMA_SIZE
-		| SDHCI_QUIRK_32BIT_ADMA_SIZE
-		| SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC;
-
-	if (pdata->quirks)
-		host->quirks |= pdata->quirks;
-
-	/* enable 1/8V DDR capable */
-	host->mmc->caps |= MMC_CAP_1_8V_DDR;
-
-	/* If slot design supports 8 bit data, indicate this to MMC. */
-	if (pdata->flags & PXA_FLAG_SD_8_BIT_CAPABLE_SLOT)
-		host->mmc->caps |= MMC_CAP_8_BIT_DATA;
-
-	ret = sdhci_add_host(host);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to add host\n");
-		goto out;
-	}
-
-	if (pxa->pdata->max_speed)
-		host->mmc->f_max = pxa->pdata->max_speed;
-
-	platform_set_drvdata(pdev, host);
-
-	return 0;
-out:
-	if (host) {
-		clk_put(pxa->clk);
-		if (host->ioaddr)
-			iounmap(host->ioaddr);
-		if (pxa->res)
-			release_mem_region(pxa->res->start,
-					   resource_size(pxa->res));
-		sdhci_free_host(host);
-	}
-
-	return ret;
-}
-
-static int __devexit sdhci_pxa_remove(struct platform_device *pdev)
-{
-	struct sdhci_host *host = platform_get_drvdata(pdev);
-	struct sdhci_pxa *pxa = sdhci_priv(host);
-	int dead = 0;
-	u32 scratch;
-
-	if (host) {
-		scratch = readl(host->ioaddr + SDHCI_INT_STATUS);
-		if (scratch == (u32)-1)
-			dead = 1;
-
-		sdhci_remove_host(host, dead);
-
-		if (host->ioaddr)
-			iounmap(host->ioaddr);
-		if (pxa->res)
-			release_mem_region(pxa->res->start,
-					   resource_size(pxa->res));
-		if (pxa->clk_enable) {
-			clk_disable(pxa->clk);
-			pxa->clk_enable = 0;
-		}
-		clk_put(pxa->clk);
-
-		sdhci_free_host(host);
-		platform_set_drvdata(pdev, NULL);
-	}
-
-	return 0;
-}
-
-#ifdef CONFIG_PM
-static int sdhci_pxa_suspend(struct platform_device *dev, pm_message_t state)
-{
-	struct sdhci_host *host = platform_get_drvdata(dev);
-
-	return sdhci_suspend_host(host, state);
-}
-
-static int sdhci_pxa_resume(struct platform_device *dev)
-{
-	struct sdhci_host *host = platform_get_drvdata(dev);
-
-	return sdhci_resume_host(host);
-}
-#else
-#define sdhci_pxa_suspend	NULL
-#define sdhci_pxa_resume	NULL
-#endif
-
-static struct platform_driver sdhci_pxa_driver = {
-	.probe		= sdhci_pxa_probe,
-	.remove		= __devexit_p(sdhci_pxa_remove),
-	.suspend	= sdhci_pxa_suspend,
-	.resume		= sdhci_pxa_resume,
-	.driver		= {
-		.name	= DRIVER_NAME,
-		.owner	= THIS_MODULE,
-	},
-};
-
-/*****************************************************************************\
- *                                                                           *
- * Driver init/exit                                                          *
- *                                                                           *
-\*****************************************************************************/
-
-static int __init sdhci_pxa_init(void)
-{
-	return platform_driver_register(&sdhci_pxa_driver);
-}
-
-static void __exit sdhci_pxa_exit(void)
-{
-	platform_driver_unregister(&sdhci_pxa_driver);
-}
-
-module_init(sdhci_pxa_init);
-module_exit(sdhci_pxa_exit);
-
-MODULE_DESCRIPTION("SDH controller driver for PXA168/PXA910/MMP2");
-MODULE_AUTHOR("Zhangfei Gao <zhangfei.gao@marvell.com>");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/sdhci-pxav2.c b/drivers/mmc/host/sdhci-pxav2.c
new file mode 100644
index 0000000..38f5899
--- /dev/null
+++ b/drivers/mmc/host/sdhci-pxav2.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright (C) 2010 Marvell International Ltd.
+ *		Zhangfei Gao <zhangfei.gao@marvell.com>
+ *		Kevin Wang <dwang4@marvell.com>
+ *		Jun Nie <njun@marvell.com>
+ *		Qiming Wu <wuqm@marvell.com>
+ *		Philip Rakity <prakity@marvell.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+#include <linux/mmc/card.h>
+#include <linux/mmc/host.h>
+#include <linux/platform_data/pxa_sdhci.h>
+#include <linux/slab.h>
+#include "sdhci.h"
+#include "sdhci-pltfm.h"
+
+#define SD_FIFO_PARAM		0xe0
+#define DIS_PAD_SD_CLK_GATE	0x0400 /* Turn on/off Dynamic SD Clock Gating */
+#define CLK_GATE_ON		0x0200 /* Disable/enable Clock Gate */
+#define CLK_GATE_CTL		0x0100 /* Clock Gate Control */
+#define CLK_GATE_SETTING_BITS	(DIS_PAD_SD_CLK_GATE | \
+		CLK_GATE_ON | CLK_GATE_CTL)
+
+#define SD_CLOCK_BURST_SIZE_SETUP	0xe6
+#define SDCLK_SEL_SHIFT		8
+#define SDCLK_SEL_MASK		0x3
+#define SDCLK_DELAY_SHIFT	10
+#define SDCLK_DELAY_MASK	0x3c
+
+#define SD_CE_ATA_2		0xea
+#define MMC_CARD		0x1000
+#define MMC_WIDTH		0x0100
+
+static void pxav2_set_private_registers(struct sdhci_host *host, u8 mask)
+{
+	struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc));
+	struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data;
+
+	if (mask == SDHCI_RESET_ALL) {
+		u16 tmp = 0;
+
+		/*
+		 * tune timing of read data/command when crc error happen
+		 * no performance impact
+		 */
+		if (pdata->clk_delay_sel == 1) {
+			tmp = readw(host->ioaddr + SD_CLOCK_BURST_SIZE_SETUP);
+
+			tmp &= ~(SDCLK_DELAY_MASK << SDCLK_DELAY_SHIFT);
+			tmp |= (pdata->clk_delay_cycles & SDCLK_DELAY_MASK)
+				<< SDCLK_DELAY_SHIFT;
+			tmp &= ~(SDCLK_SEL_MASK << SDCLK_SEL_SHIFT);
+			tmp |= (1 & SDCLK_SEL_MASK) << SDCLK_SEL_SHIFT;
+
+			writew(tmp, host->ioaddr + SD_CLOCK_BURST_SIZE_SETUP);
+		}
+
+		if (pdata->flags & PXA_FLAG_ENABLE_CLOCK_GATING) {
+			tmp = readw(host->ioaddr + SD_FIFO_PARAM);
+			tmp &= ~CLK_GATE_SETTING_BITS;
+			writew(tmp, host->ioaddr + SD_FIFO_PARAM);
+		} else {
+			tmp = readw(host->ioaddr + SD_FIFO_PARAM);
+			tmp &= ~CLK_GATE_SETTING_BITS;
+			tmp |= CLK_GATE_SETTING_BITS;
+			writew(tmp, host->ioaddr + SD_FIFO_PARAM);
+		}
+	}
+}
+
+static int pxav2_mmc_set_width(struct sdhci_host *host, int width)
+{
+	u8 ctrl;
+	u16 tmp;
+
+	ctrl = readb(host->ioaddr + SDHCI_HOST_CONTROL);
+	tmp = readw(host->ioaddr + SD_CE_ATA_2);
+	if (width == MMC_BUS_WIDTH_8) {
+		ctrl &= ~SDHCI_CTRL_4BITBUS;
+		tmp |= MMC_CARD | MMC_WIDTH;
+	} else {
+		tmp &= ~(MMC_CARD | MMC_WIDTH);
+		if (width == MMC_BUS_WIDTH_4)
+			ctrl |= SDHCI_CTRL_4BITBUS;
+		else
+			ctrl &= ~SDHCI_CTRL_4BITBUS;
+	}
+	writew(tmp, host->ioaddr + SD_CE_ATA_2);
+	writeb(ctrl, host->ioaddr + SDHCI_HOST_CONTROL);
+
+	return 0;
+}
+
+static u32 pxav2_get_max_clock(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+
+	return clk_get_rate(pltfm_host->clk);
+}
+
+static struct sdhci_ops pxav2_sdhci_ops = {
+	.get_max_clock = pxav2_get_max_clock,
+	.platform_reset_exit = pxav2_set_private_registers,
+	.platform_8bit_width = pxav2_mmc_set_width,
+};
+
+static int __devinit sdhci_pxav2_probe(struct platform_device *pdev)
+{
+	struct sdhci_pltfm_host *pltfm_host;
+	struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data;
+	struct device *dev = &pdev->dev;
+	struct sdhci_host *host = NULL;
+	struct sdhci_pxa *pxa = NULL;
+	int ret;
+	struct clk *clk;
+
+	pxa = kzalloc(sizeof(struct sdhci_pxa), GFP_KERNEL);
+	if (!pxa)
+		return -ENOMEM;
+
+	host = sdhci_pltfm_init(pdev, NULL);
+	if (IS_ERR(host)) {
+		kfree(pxa);
+		return PTR_ERR(host);
+	}
+	pltfm_host = sdhci_priv(host);
+	pltfm_host->priv = pxa;
+
+	clk = clk_get(dev, "PXA-SDHCLK");
+	if (IS_ERR(clk)) {
+		dev_err(dev, "failed to get io clock\n");
+		ret = PTR_ERR(clk);
+		goto err_clk_get;
+	}
+	pltfm_host->clk = clk;
+	clk_enable(clk);
+
+	host->quirks = SDHCI_QUIRK_BROKEN_ADMA
+		| SDHCI_QUIRK_BROKEN_TIMEOUT_VAL
+		| SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN;
+
+	if (pdata) {
+		if (pdata->flags & PXA_FLAG_CARD_PERMANENT) {
+			/* on-chip device */
+			host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION;
+			host->mmc->caps |= MMC_CAP_NONREMOVABLE;
+		}
+
+		/* If slot design supports 8 bit data, indicate this to MMC. */
+		if (pdata->flags & PXA_FLAG_SD_8_BIT_CAPABLE_SLOT)
+			host->mmc->caps |= MMC_CAP_8_BIT_DATA;
+
+		if (pdata->quirks)
+			host->quirks |= pdata->quirks;
+		if (pdata->host_caps)
+			host->mmc->caps |= pdata->host_caps;
+		if (pdata->pm_caps)
+			host->mmc->pm_caps |= pdata->pm_caps;
+	}
+
+	host->ops = &pxav2_sdhci_ops;
+
+	ret = sdhci_add_host(host);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to add host\n");
+		goto err_add_host;
+	}
+
+	platform_set_drvdata(pdev, host);
+
+	return 0;
+
+err_add_host:
+	clk_disable(clk);
+	clk_put(clk);
+err_clk_get:
+	sdhci_pltfm_free(pdev);
+	kfree(pxa);
+	return ret;
+}
+
+static int __devexit sdhci_pxav2_remove(struct platform_device *pdev)
+{
+	struct sdhci_host *host = platform_get_drvdata(pdev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_pxa *pxa = pltfm_host->priv;
+
+	sdhci_remove_host(host, 1);
+
+	clk_disable(pltfm_host->clk);
+	clk_put(pltfm_host->clk);
+	sdhci_pltfm_free(pdev);
+	kfree(pxa);
+
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static struct platform_driver sdhci_pxav2_driver = {
+	.driver		= {
+		.name	= "sdhci-pxav2",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= sdhci_pxav2_probe,
+	.remove		= __devexit_p(sdhci_pxav2_remove),
+#ifdef CONFIG_PM
+	.suspend	= sdhci_pltfm_suspend,
+	.resume		= sdhci_pltfm_resume,
+#endif
+};
+static int __init sdhci_pxav2_init(void)
+{
+	return platform_driver_register(&sdhci_pxav2_driver);
+}
+
+static void __exit sdhci_pxav2_exit(void)
+{
+	platform_driver_unregister(&sdhci_pxav2_driver);
+}
+
+module_init(sdhci_pxav2_init);
+module_exit(sdhci_pxav2_exit);
+
+MODULE_DESCRIPTION("SDHCI driver for pxav2");
+MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_LICENSE("GPL v2");
+
diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c
new file mode 100644
index 0000000..4198dbb
--- /dev/null
+++ b/drivers/mmc/host/sdhci-pxav3.c
@@ -0,0 +1,289 @@
+/*
+ * Copyright (C) 2010 Marvell International Ltd.
+ *		Zhangfei Gao <zhangfei.gao@marvell.com>
+ *		Kevin Wang <dwang4@marvell.com>
+ *		Mingwei Wang <mwwang@marvell.com>
+ *		Philip Rakity <prakity@marvell.com>
+ *		Mark Brown <markb@marvell.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/gpio.h>
+#include <linux/mmc/card.h>
+#include <linux/mmc/host.h>
+#include <linux/platform_data/pxa_sdhci.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include "sdhci.h"
+#include "sdhci-pltfm.h"
+
+#define SD_CLOCK_BURST_SIZE_SETUP		0x10A
+#define SDCLK_SEL	0x100
+#define SDCLK_DELAY_SHIFT	9
+#define SDCLK_DELAY_MASK	0x1f
+
+#define SD_CFG_FIFO_PARAM       0x100
+#define SDCFG_GEN_PAD_CLK_ON	(1<<6)
+#define SDCFG_GEN_PAD_CLK_CNT_MASK	0xFF
+#define SDCFG_GEN_PAD_CLK_CNT_SHIFT	24
+
+#define SD_SPI_MODE          0x108
+#define SD_CE_ATA_1          0x10C
+
+#define SD_CE_ATA_2          0x10E
+#define SDCE_MISC_INT		(1<<2)
+#define SDCE_MISC_INT_EN	(1<<1)
+
+static void pxav3_set_private_registers(struct sdhci_host *host, u8 mask)
+{
+	struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc));
+	struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data;
+
+	if (mask == SDHCI_RESET_ALL) {
+		/*
+		 * tune timing of read data/command when crc error happen
+		 * no performance impact
+		 */
+		if (pdata && 0 != pdata->clk_delay_cycles) {
+			u16 tmp;
+
+			tmp = readw(host->ioaddr + SD_CLOCK_BURST_SIZE_SETUP);
+			tmp |= (pdata->clk_delay_cycles & SDCLK_DELAY_MASK)
+				<< SDCLK_DELAY_SHIFT;
+			tmp |= SDCLK_SEL;
+			writew(tmp, host->ioaddr + SD_CLOCK_BURST_SIZE_SETUP);
+		}
+	}
+}
+
+#define MAX_WAIT_COUNT 5
+static void pxav3_gen_init_74_clocks(struct sdhci_host *host, u8 power_mode)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_pxa *pxa = pltfm_host->priv;
+	u16 tmp;
+	int count;
+
+	if (pxa->power_mode == MMC_POWER_UP
+			&& power_mode == MMC_POWER_ON) {
+
+		dev_dbg(mmc_dev(host->mmc),
+				"%s: slot->power_mode = %d,"
+				"ios->power_mode = %d\n",
+				__func__,
+				pxa->power_mode,
+				power_mode);
+
+		/* set we want notice of when 74 clocks are sent */
+		tmp = readw(host->ioaddr + SD_CE_ATA_2);
+		tmp |= SDCE_MISC_INT_EN;
+		writew(tmp, host->ioaddr + SD_CE_ATA_2);
+
+		/* start sending the 74 clocks */
+		tmp = readw(host->ioaddr + SD_CFG_FIFO_PARAM);
+		tmp |= SDCFG_GEN_PAD_CLK_ON;
+		writew(tmp, host->ioaddr + SD_CFG_FIFO_PARAM);
+
+		/* slowest speed is about 100KHz or 10usec per clock */
+		udelay(740);
+		count = 0;
+
+		while (count++ < MAX_WAIT_COUNT) {
+			if ((readw(host->ioaddr + SD_CE_ATA_2)
+						& SDCE_MISC_INT) == 0)
+				break;
+			udelay(10);
+		}
+
+		if (count == MAX_WAIT_COUNT)
+			dev_warn(mmc_dev(host->mmc), "74 clock interrupt not cleared\n");
+
+		/* clear the interrupt bit if posted */
+		tmp = readw(host->ioaddr + SD_CE_ATA_2);
+		tmp |= SDCE_MISC_INT;
+		writew(tmp, host->ioaddr + SD_CE_ATA_2);
+	}
+	pxa->power_mode = power_mode;
+}
+
+static int pxav3_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
+{
+	u16 ctrl_2;
+
+	/*
+	 * Set V18_EN -- UHS modes do not work without this.
+	 * does not change signaling voltage
+	 */
+	ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+
+	/* Select Bus Speed Mode for host */
+	ctrl_2 &= ~SDHCI_CTRL_UHS_MASK;
+	switch (uhs) {
+	case MMC_TIMING_UHS_SDR12:
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR12;
+		break;
+	case MMC_TIMING_UHS_SDR25:
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR25;
+		break;
+	case MMC_TIMING_UHS_SDR50:
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR50 | SDHCI_CTRL_VDD_180;
+		break;
+	case MMC_TIMING_UHS_SDR104:
+		ctrl_2 |= SDHCI_CTRL_UHS_SDR104 | SDHCI_CTRL_VDD_180;
+		break;
+	case MMC_TIMING_UHS_DDR50:
+		ctrl_2 |= SDHCI_CTRL_UHS_DDR50 | SDHCI_CTRL_VDD_180;
+		break;
+	}
+
+	sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
+	dev_dbg(mmc_dev(host->mmc),
+		"%s uhs = %d, ctrl_2 = %04X\n",
+		__func__, uhs, ctrl_2);
+
+	return 0;
+}
+
+static struct sdhci_ops pxav3_sdhci_ops = {
+	.platform_reset_exit = pxav3_set_private_registers,
+	.set_uhs_signaling = pxav3_set_uhs_signaling,
+	.platform_send_init_74_clocks = pxav3_gen_init_74_clocks,
+};
+
+static int __devinit sdhci_pxav3_probe(struct platform_device *pdev)
+{
+	struct sdhci_pltfm_host *pltfm_host;
+	struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data;
+	struct device *dev = &pdev->dev;
+	struct sdhci_host *host = NULL;
+	struct sdhci_pxa *pxa = NULL;
+	int ret;
+	struct clk *clk;
+
+	pxa = kzalloc(sizeof(struct sdhci_pxa), GFP_KERNEL);
+	if (!pxa)
+		return -ENOMEM;
+
+	host = sdhci_pltfm_init(pdev, NULL);
+	if (IS_ERR(host)) {
+		kfree(pxa);
+		return PTR_ERR(host);
+	}
+	pltfm_host = sdhci_priv(host);
+	pltfm_host->priv = pxa;
+
+	clk = clk_get(dev, "PXA-SDHCLK");
+	if (IS_ERR(clk)) {
+		dev_err(dev, "failed to get io clock\n");
+		ret = PTR_ERR(clk);
+		goto err_clk_get;
+	}
+	pltfm_host->clk = clk;
+	clk_enable(clk);
+
+	host->quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL
+		| SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC;
+
+	/* enable 1/8V DDR capable */
+	host->mmc->caps |= MMC_CAP_1_8V_DDR;
+
+	if (pdata) {
+		if (pdata->flags & PXA_FLAG_CARD_PERMANENT) {
+			/* on-chip device */
+			host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION;
+			host->mmc->caps |= MMC_CAP_NONREMOVABLE;
+		}
+
+		/* If slot design supports 8 bit data, indicate this to MMC. */
+		if (pdata->flags & PXA_FLAG_SD_8_BIT_CAPABLE_SLOT)
+			host->mmc->caps |= MMC_CAP_8_BIT_DATA;
+
+		if (pdata->quirks)
+			host->quirks |= pdata->quirks;
+		if (pdata->host_caps)
+			host->mmc->caps |= pdata->host_caps;
+		if (pdata->pm_caps)
+			host->mmc->pm_caps |= pdata->pm_caps;
+	}
+
+	host->ops = &pxav3_sdhci_ops;
+
+	ret = sdhci_add_host(host);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to add host\n");
+		goto err_add_host;
+	}
+
+	platform_set_drvdata(pdev, host);
+
+	return 0;
+
+err_add_host:
+	clk_disable(clk);
+	clk_put(clk);
+err_clk_get:
+	sdhci_pltfm_free(pdev);
+	kfree(pxa);
+	return ret;
+}
+
+static int __devexit sdhci_pxav3_remove(struct platform_device *pdev)
+{
+	struct sdhci_host *host = platform_get_drvdata(pdev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_pxa *pxa = pltfm_host->priv;
+
+	sdhci_remove_host(host, 1);
+
+	clk_disable(pltfm_host->clk);
+	clk_put(pltfm_host->clk);
+	sdhci_pltfm_free(pdev);
+	kfree(pxa);
+
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static struct platform_driver sdhci_pxav3_driver = {
+	.driver		= {
+		.name	= "sdhci-pxav3",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= sdhci_pxav3_probe,
+	.remove		= __devexit_p(sdhci_pxav3_remove),
+#ifdef CONFIG_PM
+	.suspend	= sdhci_pltfm_suspend,
+	.resume		= sdhci_pltfm_resume,
+#endif
+};
+static int __init sdhci_pxav3_init(void)
+{
+	return platform_driver_register(&sdhci_pxav3_driver);
+}
+
+static void __exit sdhci_pxav3_exit(void)
+{
+	platform_driver_unregister(&sdhci_pxav3_driver);
+}
+
+module_init(sdhci_pxav3_init);
+module_exit(sdhci_pxav3_exit);
+
+MODULE_DESCRIPTION("SDHCI driver for pxav3");
+MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_LICENSE("GPL v2");
+
diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c
index 69e3ee3..460ffaf 100644
--- a/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c
@@ -612,16 +612,14 @@
 {
 	struct sdhci_host *host = platform_get_drvdata(dev);
 
-	sdhci_suspend_host(host, pm);
-	return 0;
+	return sdhci_suspend_host(host, pm);
 }
 
 static int sdhci_s3c_resume(struct platform_device *dev)
 {
 	struct sdhci_host *host = platform_get_drvdata(dev);
 
-	sdhci_resume_host(host);
-	return 0;
+	return sdhci_resume_host(host);
 }
 
 #else
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index 343c97e..18b0bd3 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -24,7 +24,6 @@
 #include <mach/gpio.h>
 #include <mach/sdhci.h>
 
-#include "sdhci.h"
 #include "sdhci-pltfm.h"
 
 static u32 tegra_sdhci_readl(struct sdhci_host *host, int reg)
@@ -116,20 +115,42 @@
 	return 0;
 }
 
+static struct sdhci_ops tegra_sdhci_ops = {
+	.get_ro     = tegra_sdhci_get_ro,
+	.read_l     = tegra_sdhci_readl,
+	.read_w     = tegra_sdhci_readw,
+	.write_l    = tegra_sdhci_writel,
+	.platform_8bit_width = tegra_sdhci_8bit,
+};
 
-static int tegra_sdhci_pltfm_init(struct sdhci_host *host,
-				  struct sdhci_pltfm_data *pdata)
+static struct sdhci_pltfm_data sdhci_tegra_pdata = {
+	.quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
+		  SDHCI_QUIRK_SINGLE_POWER_WRITE |
+		  SDHCI_QUIRK_NO_HISPD_BIT |
+		  SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC,
+	.ops  = &tegra_sdhci_ops,
+};
+
+static int __devinit sdhci_tegra_probe(struct platform_device *pdev)
 {
-	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc));
+	struct sdhci_pltfm_host *pltfm_host;
 	struct tegra_sdhci_platform_data *plat;
+	struct sdhci_host *host;
 	struct clk *clk;
 	int rc;
 
+	host = sdhci_pltfm_init(pdev, &sdhci_tegra_pdata);
+	if (IS_ERR(host))
+		return PTR_ERR(host);
+
+	pltfm_host = sdhci_priv(host);
+
 	plat = pdev->dev.platform_data;
+
 	if (plat == NULL) {
 		dev_err(mmc_dev(host->mmc), "missing platform data\n");
-		return -ENXIO;
+		rc = -ENXIO;
+		goto err_no_plat;
 	}
 
 	if (gpio_is_valid(plat->power_gpio)) {
@@ -137,7 +158,7 @@
 		if (rc) {
 			dev_err(mmc_dev(host->mmc),
 				"failed to allocate power gpio\n");
-			goto out;
+			goto err_power_req;
 		}
 		tegra_gpio_enable(plat->power_gpio);
 		gpio_direction_output(plat->power_gpio, 1);
@@ -148,7 +169,7 @@
 		if (rc) {
 			dev_err(mmc_dev(host->mmc),
 				"failed to allocate cd gpio\n");
-			goto out_power;
+			goto err_cd_req;
 		}
 		tegra_gpio_enable(plat->cd_gpio);
 		gpio_direction_input(plat->cd_gpio);
@@ -159,7 +180,7 @@
 
 		if (rc)	{
 			dev_err(mmc_dev(host->mmc), "request irq error\n");
-			goto out_cd;
+			goto err_cd_irq_req;
 		}
 
 	}
@@ -169,7 +190,7 @@
 		if (rc) {
 			dev_err(mmc_dev(host->mmc),
 				"failed to allocate wp gpio\n");
-			goto out_irq;
+			goto err_wp_req;
 		}
 		tegra_gpio_enable(plat->wp_gpio);
 		gpio_direction_input(plat->wp_gpio);
@@ -179,7 +200,7 @@
 	if (IS_ERR(clk)) {
 		dev_err(mmc_dev(host->mmc), "clk err\n");
 		rc = PTR_ERR(clk);
-		goto out_wp;
+		goto err_clk_get;
 	}
 	clk_enable(clk);
 	pltfm_host->clk = clk;
@@ -189,38 +210,47 @@
 	if (plat->is_8bit)
 		host->mmc->caps |= MMC_CAP_8_BIT_DATA;
 
+	rc = sdhci_add_host(host);
+	if (rc)
+		goto err_add_host;
+
 	return 0;
 
-out_wp:
+err_add_host:
+	clk_disable(pltfm_host->clk);
+	clk_put(pltfm_host->clk);
+err_clk_get:
 	if (gpio_is_valid(plat->wp_gpio)) {
 		tegra_gpio_disable(plat->wp_gpio);
 		gpio_free(plat->wp_gpio);
 	}
-
-out_irq:
+err_wp_req:
 	if (gpio_is_valid(plat->cd_gpio))
 		free_irq(gpio_to_irq(plat->cd_gpio), host);
-out_cd:
+err_cd_irq_req:
 	if (gpio_is_valid(plat->cd_gpio)) {
 		tegra_gpio_disable(plat->cd_gpio);
 		gpio_free(plat->cd_gpio);
 	}
-
-out_power:
+err_cd_req:
 	if (gpio_is_valid(plat->power_gpio)) {
 		tegra_gpio_disable(plat->power_gpio);
 		gpio_free(plat->power_gpio);
 	}
-
-out:
+err_power_req:
+err_no_plat:
+	sdhci_pltfm_free(pdev);
 	return rc;
 }
 
-static void tegra_sdhci_pltfm_exit(struct sdhci_host *host)
+static int __devexit sdhci_tegra_remove(struct platform_device *pdev)
 {
+	struct sdhci_host *host = platform_get_drvdata(pdev);
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
-	struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc));
 	struct tegra_sdhci_platform_data *plat;
+	int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff);
+
+	sdhci_remove_host(host, dead);
 
 	plat = pdev->dev.platform_data;
 
@@ -242,22 +272,37 @@
 
 	clk_disable(pltfm_host->clk);
 	clk_put(pltfm_host->clk);
+
+	sdhci_pltfm_free(pdev);
+
+	return 0;
 }
 
-static struct sdhci_ops tegra_sdhci_ops = {
-	.get_ro     = tegra_sdhci_get_ro,
-	.read_l     = tegra_sdhci_readl,
-	.read_w     = tegra_sdhci_readw,
-	.write_l    = tegra_sdhci_writel,
-	.platform_8bit_width = tegra_sdhci_8bit,
+static struct platform_driver sdhci_tegra_driver = {
+	.driver		= {
+		.name	= "sdhci-tegra",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= sdhci_tegra_probe,
+	.remove		= __devexit_p(sdhci_tegra_remove),
+#ifdef CONFIG_PM
+	.suspend	= sdhci_pltfm_suspend,
+	.resume		= sdhci_pltfm_resume,
+#endif
 };
 
-struct sdhci_pltfm_data sdhci_tegra_pdata = {
-	.quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
-		  SDHCI_QUIRK_SINGLE_POWER_WRITE |
-		  SDHCI_QUIRK_NO_HISPD_BIT |
-		  SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC,
-	.ops  = &tegra_sdhci_ops,
-	.init = tegra_sdhci_pltfm_init,
-	.exit = tegra_sdhci_pltfm_exit,
-};
+static int __init sdhci_tegra_init(void)
+{
+	return platform_driver_register(&sdhci_tegra_driver);
+}
+module_init(sdhci_tegra_init);
+
+static void __exit sdhci_tegra_exit(void)
+{
+	platform_driver_unregister(&sdhci_tegra_driver);
+}
+module_exit(sdhci_tegra_exit);
+
+MODULE_DESCRIPTION("SDHCI driver for Tegra");
+MODULE_AUTHOR(" Google, Inc.");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 58d5436..c31a334 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -127,11 +127,15 @@
 
 static void sdhci_set_card_detection(struct sdhci_host *host, bool enable)
 {
-	u32 irqs = SDHCI_INT_CARD_REMOVE | SDHCI_INT_CARD_INSERT;
+	u32 present, irqs;
 
 	if (host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION)
 		return;
 
+	present = sdhci_readl(host, SDHCI_PRESENT_STATE) &
+			      SDHCI_CARD_PRESENT;
+	irqs = present ? SDHCI_INT_CARD_REMOVE : SDHCI_INT_CARD_INSERT;
+
 	if (enable)
 		sdhci_unmask_irqs(host, irqs);
 	else
@@ -2154,13 +2158,30 @@
 		mmc_hostname(host->mmc), intmask);
 
 	if (intmask & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE)) {
+		u32 present = sdhci_readl(host, SDHCI_PRESENT_STATE) &
+			      SDHCI_CARD_PRESENT;
+
+		/*
+		 * There is a observation on i.mx esdhc.  INSERT bit will be
+		 * immediately set again when it gets cleared, if a card is
+		 * inserted.  We have to mask the irq to prevent interrupt
+		 * storm which will freeze the system.  And the REMOVE gets
+		 * the same situation.
+		 *
+		 * More testing are needed here to ensure it works for other
+		 * platforms though.
+		 */
+		sdhci_mask_irqs(host, present ? SDHCI_INT_CARD_INSERT :
+						SDHCI_INT_CARD_REMOVE);
+		sdhci_unmask_irqs(host, present ? SDHCI_INT_CARD_REMOVE :
+						  SDHCI_INT_CARD_INSERT);
+
 		sdhci_writel(host, intmask & (SDHCI_INT_CARD_INSERT |
-			SDHCI_INT_CARD_REMOVE), SDHCI_INT_STATUS);
+			     SDHCI_INT_CARD_REMOVE), SDHCI_INT_STATUS);
+		intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE);
 		tasklet_schedule(&host->card_tasklet);
 	}
 
-	intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE);
-
 	if (intmask & SDHCI_INT_CMD_MASK) {
 		sdhci_writel(host, intmask & SDHCI_INT_CMD_MASK,
 			SDHCI_INT_STATUS);
@@ -2488,6 +2509,11 @@
 	} else
 		mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200;
 
+	if (host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK)
+		mmc->max_discard_to = (1 << 27) / (mmc->f_max / 1000);
+	else
+		mmc->max_discard_to = (1 << 27) / host->timeout_clk;
+
 	mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23;
 
 	if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12)
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 14f8edb..557886b 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -175,6 +175,7 @@
 	enum mmcif_state state;
 	spinlock_t lock;
 	bool power;
+	bool card_present;
 
 	/* DMA support */
 	struct dma_chan		*chan_rx;
@@ -877,23 +878,23 @@
 	spin_unlock_irqrestore(&host->lock, flags);
 
 	if (ios->power_mode == MMC_POWER_UP) {
-		if (p->set_pwr)
-			p->set_pwr(host->pd, ios->power_mode);
-		if (!host->power) {
+		if (!host->card_present) {
 			/* See if we also get DMA */
 			sh_mmcif_request_dma(host, host->pd->dev.platform_data);
-			pm_runtime_get_sync(&host->pd->dev);
-			host->power = true;
+			host->card_present = true;
 		}
 	} else if (ios->power_mode == MMC_POWER_OFF || !ios->clock) {
 		/* clock stop */
 		sh_mmcif_clock_control(host, 0);
 		if (ios->power_mode == MMC_POWER_OFF) {
-			if (host->power) {
-				pm_runtime_put(&host->pd->dev);
+			if (host->card_present) {
 				sh_mmcif_release_dma(host);
-				host->power = false;
+				host->card_present = false;
 			}
+		}
+		if (host->power) {
+			pm_runtime_put(&host->pd->dev);
+			host->power = false;
 			if (p->down_pwr)
 				p->down_pwr(host->pd);
 		}
@@ -901,8 +902,16 @@
 		return;
 	}
 
-	if (ios->clock)
+	if (ios->clock) {
+		if (!host->power) {
+			if (p->set_pwr)
+				p->set_pwr(host->pd, ios->power_mode);
+			pm_runtime_get_sync(&host->pd->dev);
+			host->power = true;
+			sh_mmcif_sync_reset(host);
+		}
 		sh_mmcif_clock_control(host, ios->clock);
+	}
 
 	host->bus_width = ios->bus_width;
 	host->state = STATE_IDLE;
diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index ce500f0..774f643 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -26,6 +26,7 @@
 #include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/mfd/tmio.h>
 #include <linux/sh_dma.h>
+#include <linux/delay.h>
 
 #include "tmio_mmc.h"
 
@@ -55,6 +56,39 @@
 		return -ENOSYS;
 }
 
+static int sh_mobile_sdhi_wait_idle(struct tmio_mmc_host *host)
+{
+	int timeout = 1000;
+
+	while (--timeout && !(sd_ctrl_read16(host, CTL_STATUS2) & (1 << 13)))
+		udelay(1);
+
+	if (!timeout) {
+		dev_warn(host->pdata->dev, "timeout waiting for SD bus idle\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int sh_mobile_sdhi_write16_hook(struct tmio_mmc_host *host, int addr)
+{
+	switch (addr)
+	{
+	case CTL_SD_CMD:
+	case CTL_STOP_INTERNAL_ACTION:
+	case CTL_XFER_BLK_COUNT:
+	case CTL_SD_CARD_CLK_CTL:
+	case CTL_SD_XFER_LEN:
+	case CTL_SD_MEM_CARD_OPT:
+	case CTL_TRANSACTION_CTL:
+	case CTL_DMA_ENABLE:
+		return sh_mobile_sdhi_wait_idle(host);
+	}
+
+	return 0;
+}
+
 static int __devinit sh_mobile_sdhi_probe(struct platform_device *pdev)
 {
 	struct sh_mobile_sdhi *priv;
@@ -86,6 +120,8 @@
 	mmc_data->hclk = clk_get_rate(priv->clk);
 	mmc_data->set_pwr = sh_mobile_sdhi_set_pwr;
 	mmc_data->get_cd = sh_mobile_sdhi_get_cd;
+	if (mmc_data->flags & TMIO_MMC_HAS_IDLE_WAIT)
+		mmc_data->write16_hook = sh_mobile_sdhi_write16_hook;
 	mmc_data->capabilities = MMC_CAP_MMC_HIGHSPEED;
 	if (p) {
 		mmc_data->flags = p->tmio_flags;
diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index 8260bc2..087d880 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -18,6 +18,7 @@
 
 #include <linux/highmem.h>
 #include <linux/mmc/tmio.h>
+#include <linux/mutex.h>
 #include <linux/pagemap.h>
 #include <linux/spinlock.h>
 
@@ -52,6 +53,8 @@
 	void (*set_clk_div)(struct platform_device *host, int state);
 
 	int			pm_error;
+	/* recognise system-wide suspend in runtime PM methods */
+	bool			pm_global;
 
 	/* pio related stuff */
 	struct scatterlist      *sg_ptr;
@@ -73,8 +76,11 @@
 
 	/* Track lost interrupts */
 	struct delayed_work	delayed_reset_work;
-	spinlock_t		lock;
+	struct work_struct	done;
+
+	spinlock_t		lock;		/* protect host private data */
 	unsigned long		last_req_ts;
+	struct mutex		ios_lock;	/* protect set_ios() context */
 };
 
 int tmio_mmc_host_probe(struct tmio_mmc_host **host,
@@ -103,6 +109,7 @@
 
 #if defined(CONFIG_MMC_SDHI) || defined(CONFIG_MMC_SDHI_MODULE)
 void tmio_mmc_start_dma(struct tmio_mmc_host *host, struct mmc_data *data);
+void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable);
 void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdata);
 void tmio_mmc_release_dma(struct tmio_mmc_host *host);
 #else
@@ -111,6 +118,10 @@
 {
 }
 
+static inline void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable)
+{
+}
+
 static inline void tmio_mmc_request_dma(struct tmio_mmc_host *host,
 				 struct tmio_mmc_data *pdata)
 {
@@ -134,4 +145,44 @@
 int tmio_mmc_host_runtime_suspend(struct device *dev);
 int tmio_mmc_host_runtime_resume(struct device *dev);
 
+static inline u16 sd_ctrl_read16(struct tmio_mmc_host *host, int addr)
+{
+	return readw(host->ctl + (addr << host->bus_shift));
+}
+
+static inline void sd_ctrl_read16_rep(struct tmio_mmc_host *host, int addr,
+		u16 *buf, int count)
+{
+	readsw(host->ctl + (addr << host->bus_shift), buf, count);
+}
+
+static inline u32 sd_ctrl_read32(struct tmio_mmc_host *host, int addr)
+{
+	return readw(host->ctl + (addr << host->bus_shift)) |
+	       readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16;
+}
+
+static inline void sd_ctrl_write16(struct tmio_mmc_host *host, int addr, u16 val)
+{
+	/* If there is a hook and it returns non-zero then there
+	 * is an error and the write should be skipped
+	 */
+	if (host->pdata->write16_hook && host->pdata->write16_hook(host, addr))
+		return;
+	writew(val, host->ctl + (addr << host->bus_shift));
+}
+
+static inline void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr,
+		u16 *buf, int count)
+{
+	writesw(host->ctl + (addr << host->bus_shift), buf, count);
+}
+
+static inline void sd_ctrl_write32(struct tmio_mmc_host *host, int addr, u32 val)
+{
+	writew(val, host->ctl + (addr << host->bus_shift));
+	writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift));
+}
+
+
 #endif
diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c
index 25f1ad6..2aa616d 100644
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c
@@ -22,11 +22,14 @@
 
 #define TMIO_MMC_MIN_DMA_LEN 8
 
-static void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable)
+void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable)
 {
+	if (!host->chan_tx || !host->chan_rx)
+		return;
+
 #if defined(CONFIG_SUPERH) || defined(CONFIG_ARCH_SHMOBILE)
 	/* Switch DMA mode on or off - SuperH specific? */
-	writew(enable ? 2 : 0, host->ctl + (0xd8 << host->bus_shift));
+	sd_ctrl_write16(host, CTL_DMA_ENABLE, enable ? 2 : 0);
 #endif
 }
 
diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index 0b09e82..1f16357 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -46,40 +46,6 @@
 
 #include "tmio_mmc.h"
 
-static u16 sd_ctrl_read16(struct tmio_mmc_host *host, int addr)
-{
-	return readw(host->ctl + (addr << host->bus_shift));
-}
-
-static void sd_ctrl_read16_rep(struct tmio_mmc_host *host, int addr,
-		u16 *buf, int count)
-{
-	readsw(host->ctl + (addr << host->bus_shift), buf, count);
-}
-
-static u32 sd_ctrl_read32(struct tmio_mmc_host *host, int addr)
-{
-	return readw(host->ctl + (addr << host->bus_shift)) |
-	       readw(host->ctl + ((addr + 2) << host->bus_shift)) << 16;
-}
-
-static void sd_ctrl_write16(struct tmio_mmc_host *host, int addr, u16 val)
-{
-	writew(val, host->ctl + (addr << host->bus_shift));
-}
-
-static void sd_ctrl_write16_rep(struct tmio_mmc_host *host, int addr,
-		u16 *buf, int count)
-{
-	writesw(host->ctl + (addr << host->bus_shift), buf, count);
-}
-
-static void sd_ctrl_write32(struct tmio_mmc_host *host, int addr, u32 val)
-{
-	writew(val, host->ctl + (addr << host->bus_shift));
-	writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift));
-}
-
 void tmio_mmc_enable_mmc_irqs(struct tmio_mmc_host *host, u32 i)
 {
 	u32 mask = sd_ctrl_read32(host, CTL_IRQ_MASK) & ~(i & TMIO_MASK_IRQ);
@@ -284,10 +250,16 @@
 /* called with host->lock held, interrupts disabled */
 static void tmio_mmc_finish_request(struct tmio_mmc_host *host)
 {
-	struct mmc_request *mrq = host->mrq;
+	struct mmc_request *mrq;
+	unsigned long flags;
 
-	if (!mrq)
+	spin_lock_irqsave(&host->lock, flags);
+
+	mrq = host->mrq;
+	if (IS_ERR_OR_NULL(mrq)) {
+		spin_unlock_irqrestore(&host->lock, flags);
 		return;
+	}
 
 	host->cmd = NULL;
 	host->data = NULL;
@@ -296,11 +268,18 @@
 	cancel_delayed_work(&host->delayed_reset_work);
 
 	host->mrq = NULL;
+	spin_unlock_irqrestore(&host->lock, flags);
 
-	/* FIXME: mmc_request_done() can schedule! */
 	mmc_request_done(host->mmc, mrq);
 }
 
+static void tmio_mmc_done_work(struct work_struct *work)
+{
+	struct tmio_mmc_host *host = container_of(work, struct tmio_mmc_host,
+						  done);
+	tmio_mmc_finish_request(host);
+}
+
 /* These are the bitmasks the tmio chip requires to implement the MMC response
  * types. Note that R1 and R6 are the same in this scheme. */
 #define APP_CMD        0x0040
@@ -467,7 +446,7 @@
 			BUG();
 	}
 
-	tmio_mmc_finish_request(host);
+	schedule_work(&host->done);
 }
 
 static void tmio_mmc_data_irq(struct tmio_mmc_host *host)
@@ -557,7 +536,7 @@
 				tasklet_schedule(&host->dma_issue);
 		}
 	} else {
-		tmio_mmc_finish_request(host);
+		schedule_work(&host->done);
 	}
 
 out:
@@ -567,6 +546,7 @@
 irqreturn_t tmio_mmc_irq(int irq, void *devid)
 {
 	struct tmio_mmc_host *host = devid;
+	struct mmc_host *mmc = host->mmc;
 	struct tmio_mmc_data *pdata = host->pdata;
 	unsigned int ireg, irq_mask, status;
 	unsigned int sdio_ireg, sdio_irq_mask, sdio_status;
@@ -588,13 +568,13 @@
 		if (sdio_ireg && !host->sdio_irq_enabled) {
 			pr_warning("tmio_mmc: Spurious SDIO IRQ, disabling! 0x%04x 0x%04x 0x%04x\n",
 				   sdio_status, sdio_irq_mask, sdio_ireg);
-			tmio_mmc_enable_sdio_irq(host->mmc, 0);
+			tmio_mmc_enable_sdio_irq(mmc, 0);
 			goto out;
 		}
 
-		if (host->mmc->caps & MMC_CAP_SDIO_IRQ &&
+		if (mmc->caps & MMC_CAP_SDIO_IRQ &&
 			sdio_ireg & TMIO_SDIO_STAT_IOIRQ)
-			mmc_signal_sdio_irq(host->mmc);
+			mmc_signal_sdio_irq(mmc);
 
 		if (sdio_ireg)
 			goto out;
@@ -603,58 +583,49 @@
 	pr_debug_status(status);
 	pr_debug_status(ireg);
 
-	if (!ireg) {
-		tmio_mmc_disable_mmc_irqs(host, status & ~irq_mask);
-
-		pr_warning("tmio_mmc: Spurious irq, disabling! "
-			"0x%08x 0x%08x 0x%08x\n", status, irq_mask, ireg);
-		pr_debug_status(status);
-
+	/* Card insert / remove attempts */
+	if (ireg & (TMIO_STAT_CARD_INSERT | TMIO_STAT_CARD_REMOVE)) {
+		tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_CARD_INSERT |
+			TMIO_STAT_CARD_REMOVE);
+		if ((((ireg & TMIO_STAT_CARD_REMOVE) && mmc->card) ||
+		     ((ireg & TMIO_STAT_CARD_INSERT) && !mmc->card)) &&
+		    !work_pending(&mmc->detect.work))
+			mmc_detect_change(host->mmc, msecs_to_jiffies(100));
 		goto out;
 	}
 
-	while (ireg) {
-		/* Card insert / remove attempts */
-		if (ireg & (TMIO_STAT_CARD_INSERT | TMIO_STAT_CARD_REMOVE)) {
-			tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_CARD_INSERT |
-				TMIO_STAT_CARD_REMOVE);
-			mmc_detect_change(host->mmc, msecs_to_jiffies(100));
-		}
-
-		/* CRC and other errors */
-/*		if (ireg & TMIO_STAT_ERR_IRQ)
- *			handled |= tmio_error_irq(host, irq, stat);
+	/* CRC and other errors */
+/*	if (ireg & TMIO_STAT_ERR_IRQ)
+ *		handled |= tmio_error_irq(host, irq, stat);
  */
 
-		/* Command completion */
-		if (ireg & (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT)) {
-			tmio_mmc_ack_mmc_irqs(host,
-				     TMIO_STAT_CMDRESPEND |
-				     TMIO_STAT_CMDTIMEOUT);
-			tmio_mmc_cmd_irq(host, status);
-		}
-
-		/* Data transfer */
-		if (ireg & (TMIO_STAT_RXRDY | TMIO_STAT_TXRQ)) {
-			tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_RXRDY | TMIO_STAT_TXRQ);
-			tmio_mmc_pio_irq(host);
-		}
-
-		/* Data transfer completion */
-		if (ireg & TMIO_STAT_DATAEND) {
-			tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_DATAEND);
-			tmio_mmc_data_irq(host);
-		}
-
-		/* Check status - keep going until we've handled it all */
-		status = sd_ctrl_read32(host, CTL_STATUS);
-		irq_mask = sd_ctrl_read32(host, CTL_IRQ_MASK);
-		ireg = status & TMIO_MASK_IRQ & ~irq_mask;
-
-		pr_debug("Status at end of loop: %08x\n", status);
-		pr_debug_status(status);
+	/* Command completion */
+	if (ireg & (TMIO_STAT_CMDRESPEND | TMIO_STAT_CMDTIMEOUT)) {
+		tmio_mmc_ack_mmc_irqs(host,
+			     TMIO_STAT_CMDRESPEND |
+			     TMIO_STAT_CMDTIMEOUT);
+		tmio_mmc_cmd_irq(host, status);
+		goto out;
 	}
-	pr_debug("MMC IRQ end\n");
+
+	/* Data transfer */
+	if (ireg & (TMIO_STAT_RXRDY | TMIO_STAT_TXRQ)) {
+		tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_RXRDY | TMIO_STAT_TXRQ);
+		tmio_mmc_pio_irq(host);
+		goto out;
+	}
+
+	/* Data transfer completion */
+	if (ireg & TMIO_STAT_DATAEND) {
+		tmio_mmc_ack_mmc_irqs(host, TMIO_STAT_DATAEND);
+		tmio_mmc_data_irq(host);
+		goto out;
+	}
+
+	pr_warning("tmio_mmc: Spurious irq, disabling! "
+		"0x%08x 0x%08x 0x%08x\n", status, irq_mask, ireg);
+	pr_debug_status(status);
+	tmio_mmc_disable_mmc_irqs(host, status & ~irq_mask);
 
 out:
 	return IRQ_HANDLED;
@@ -749,6 +720,8 @@
 	struct tmio_mmc_data *pdata = host->pdata;
 	unsigned long flags;
 
+	mutex_lock(&host->ios_lock);
+
 	spin_lock_irqsave(&host->lock, flags);
 	if (host->mrq) {
 		if (IS_ERR(host->mrq)) {
@@ -764,6 +737,8 @@
 				host->mrq->cmd->opcode, host->last_req_ts, jiffies);
 		}
 		spin_unlock_irqrestore(&host->lock, flags);
+
+		mutex_unlock(&host->ios_lock);
 		return;
 	}
 
@@ -771,33 +746,30 @@
 
 	spin_unlock_irqrestore(&host->lock, flags);
 
-	if (ios->clock)
-		tmio_mmc_set_clock(host, ios->clock);
-
-	/* Power sequence - OFF -> UP -> ON */
-	if (ios->power_mode == MMC_POWER_UP) {
-		if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) && !pdata->power) {
+	/*
+	 * pdata->power == false only if COLD_CD is available, otherwise only
+	 * in short time intervals during probing or resuming
+	 */
+	if (ios->power_mode == MMC_POWER_ON && ios->clock) {
+		if (!pdata->power) {
 			pm_runtime_get_sync(&host->pdev->dev);
 			pdata->power = true;
 		}
+		tmio_mmc_set_clock(host, ios->clock);
 		/* power up SD bus */
 		if (host->set_pwr)
 			host->set_pwr(host->pdev, 1);
-	} else if (ios->power_mode == MMC_POWER_OFF || !ios->clock) {
-		/* power down SD bus */
-		if (ios->power_mode == MMC_POWER_OFF) {
-			if (host->set_pwr)
-				host->set_pwr(host->pdev, 0);
-			if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) &&
-			    pdata->power) {
-				pdata->power = false;
-				pm_runtime_put(&host->pdev->dev);
-			}
-		}
-		tmio_mmc_clk_stop(host);
-	} else {
 		/* start bus clock */
 		tmio_mmc_clk_start(host);
+	} else if (ios->power_mode != MMC_POWER_UP) {
+		if (host->set_pwr)
+			host->set_pwr(host->pdev, 0);
+		if ((pdata->flags & TMIO_MMC_HAS_COLD_CD) &&
+		    pdata->power) {
+			pdata->power = false;
+			pm_runtime_put(&host->pdev->dev);
+		}
+		tmio_mmc_clk_stop(host);
 	}
 
 	switch (ios->bus_width) {
@@ -817,6 +789,8 @@
 			current->comm, task_pid_nr(current),
 			ios->clock, ios->power_mode);
 	host->mrq = NULL;
+
+	mutex_unlock(&host->ios_lock);
 }
 
 static int tmio_mmc_get_ro(struct mmc_host *mmc)
@@ -913,16 +887,20 @@
 		tmio_mmc_enable_sdio_irq(mmc, 0);
 
 	spin_lock_init(&_host->lock);
+	mutex_init(&_host->ios_lock);
 
 	/* Init delayed work for request timeouts */
 	INIT_DELAYED_WORK(&_host->delayed_reset_work, tmio_mmc_reset_work);
+	INIT_WORK(&_host->done, tmio_mmc_done_work);
 
 	/* See if we also get DMA */
 	tmio_mmc_request_dma(_host, pdata);
 
 	/* We have to keep the device powered for its card detection to work */
-	if (!(pdata->flags & TMIO_MMC_HAS_COLD_CD))
+	if (!(pdata->flags & TMIO_MMC_HAS_COLD_CD)) {
+		pdata->power = true;
 		pm_runtime_get_noresume(&pdev->dev);
+	}
 
 	mmc_add_host(mmc);
 
@@ -963,6 +941,7 @@
 		pm_runtime_get_sync(&pdev->dev);
 
 	mmc_remove_host(host->mmc);
+	cancel_work_sync(&host->done);
 	cancel_delayed_work_sync(&host->delayed_reset_work);
 	tmio_mmc_release_dma(host);
 
@@ -998,11 +977,16 @@
 	/* The MMC core will perform the complete set up */
 	host->pdata->power = false;
 
+	host->pm_global = true;
 	if (!host->pm_error)
 		pm_runtime_get_sync(dev);
 
-	tmio_mmc_reset(mmc_priv(mmc));
-	tmio_mmc_request_dma(host, host->pdata);
+	if (host->pm_global) {
+		/* Runtime PM resume callback didn't run */
+		tmio_mmc_reset(host);
+		tmio_mmc_enable_dma(host, true);
+		host->pm_global = false;
+	}
 
 	return mmc_resume_host(mmc);
 }
@@ -1023,12 +1007,15 @@
 	struct tmio_mmc_data *pdata = host->pdata;
 
 	tmio_mmc_reset(host);
+	tmio_mmc_enable_dma(host, true);
 
 	if (pdata->power) {
 		/* Only entered after a card-insert interrupt */
-		tmio_mmc_set_ios(mmc, &mmc->ios);
+		if (!mmc->card)
+			tmio_mmc_set_ios(mmc, &mmc->ios);
 		mmc_detect_change(mmc, msecs_to_jiffies(100));
 	}
+	host->pm_global = false;
 
 	return 0;
 }
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 02145e9..1196f61 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2758,6 +2758,29 @@
 
 	dev_notice(&dev->dev, "proprietary Ricoh MMC controller disabled (via firewire function)\n");
 	dev_notice(&dev->dev, "MMC cards are now supported by standard SDHCI controller\n");
+
+	/*
+	 * RICOH 0xe823 SD/MMC card reader fails to recognize
+	 * certain types of SD/MMC cards. Lowering the SD base
+	 * clock frequency from 200Mhz to 50Mhz fixes this issue.
+	 *
+	 * 0x150 - SD2.0 mode enable for changing base clock
+	 *	   frequency to 50Mhz
+	 * 0xe1  - Base clock frequency
+	 * 0x32  - 50Mhz new clock frequency
+	 * 0xf9  - Key register for 0x150
+	 * 0xfc  - key register for 0xe1
+	 */
+	if (dev->device == PCI_DEVICE_ID_RICOH_R5CE823) {
+		pci_write_config_byte(dev, 0xf9, 0xfc);
+		pci_write_config_byte(dev, 0x150, 0x10);
+		pci_write_config_byte(dev, 0xf9, 0x00);
+		pci_write_config_byte(dev, 0xfc, 0x01);
+		pci_write_config_byte(dev, 0xe1, 0x32);
+		pci_write_config_byte(dev, 0xfc, 0x00);
+
+		dev_notice(&dev->dev, "MMC controller base frequency changed to 50Mhz.\n");
+	}
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832);
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 5a90266..0dc9804 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -68,6 +68,11 @@
  * controller and report the event to the driver.
  */
 #define TMIO_MMC_HAS_COLD_CD		(1 << 3)
+/*
+ * Some controllers require waiting for the SD bus to become
+ * idle before writing to some registers.
+ */
+#define TMIO_MMC_HAS_IDLE_WAIT		(1 << 4)
 
 int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
 int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
@@ -80,6 +85,8 @@
 	int alignment_shift;
 };
 
+struct tmio_mmc_host;
+
 /*
  * data for the MMC controller
  */
@@ -94,6 +101,7 @@
 	void (*set_pwr)(struct platform_device *host, int state);
 	void (*set_clk_div)(struct platform_device *host, int state);
 	int (*get_cd)(struct platform_device *host);
+	int (*write16_hook)(struct tmio_mmc_host *host, int addr);
 };
 
 static inline void tmio_mmc_cd_wakeup(struct tmio_mmc_data *pdata)
diff --git a/include/linux/mmc/boot.h b/include/linux/mmc/boot.h
index 39d787c..23acc3b 100644
--- a/include/linux/mmc/boot.h
+++ b/include/linux/mmc/boot.h
@@ -1,7 +1,7 @@
-#ifndef MMC_BOOT_H
-#define MMC_BOOT_H
+#ifndef LINUX_MMC_BOOT_H
+#define LINUX_MMC_BOOT_H
 
 enum { MMC_PROGRESS_ENTER, MMC_PROGRESS_INIT,
        MMC_PROGRESS_LOAD, MMC_PROGRESS_DONE };
 
-#endif
+#endif /* LINUX_MMC_BOOT_H */
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 6ad4355..b460fc2 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -403,4 +403,4 @@
 extern void mmc_fixup_device(struct mmc_card *card,
 			     const struct mmc_fixup *table);
 
-#endif
+#endif /* LINUX_MMC_CARD_H */
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index b6718e5..b8b1b7a 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -117,6 +117,7 @@
 
 	unsigned int		sg_len;		/* size of scatter list */
 	struct scatterlist	*sg;		/* I/O scatter list */
+	s32			host_cookie;	/* host private data */
 };
 
 struct mmc_request {
@@ -125,13 +126,16 @@
 	struct mmc_data		*data;
 	struct mmc_command	*stop;
 
-	void			*done_data;	/* completion data */
+	struct completion	completion;
 	void			(*done)(struct mmc_request *);/* completion function */
 };
 
 struct mmc_host;
 struct mmc_card;
+struct mmc_async_req;
 
+extern struct mmc_async_req *mmc_start_req(struct mmc_host *,
+					   struct mmc_async_req *, int *);
 extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *);
 extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int);
 extern int mmc_app_cmd(struct mmc_host *, struct mmc_card *);
@@ -155,6 +159,7 @@
 extern int mmc_can_secure_erase_trim(struct mmc_card *card);
 extern int mmc_erase_group_aligned(struct mmc_card *card, unsigned int from,
 				   unsigned int nr);
+extern unsigned int mmc_calc_max_discard(struct mmc_card *card);
 
 extern int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen);
 
@@ -179,4 +184,4 @@
 
 extern u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max);
 
-#endif
+#endif /* LINUX_MMC_CORE_H */
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index bdd7cee..6b46819 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -11,8 +11,8 @@
  * (at your option) any later version.
  */
 
-#ifndef _LINUX_MMC_DW_MMC_H_
-#define _LINUX_MMC_DW_MMC_H_
+#ifndef LINUX_MMC_DW_MMC_H
+#define LINUX_MMC_DW_MMC_H
 
 #define MAX_MCI_SLOTS	2
 
@@ -48,6 +48,7 @@
  * @data: The data currently being transferred, or NULL if no data
  *	transfer is in progress.
  * @use_dma: Whether DMA channel is initialized or not.
+ * @using_dma: Whether DMA is in use for the current transfer.
  * @sg_dma: Bus address of DMA buffer.
  * @sg_cpu: Virtual address of DMA buffer.
  * @dma_ops: Pointer to platform-specific DMA callbacks.
@@ -74,7 +75,11 @@
  * @pdev: Platform device associated with the MMC controller.
  * @pdata: Platform data associated with the MMC controller.
  * @slot: Slots sharing this MMC controller.
+ * @fifo_depth: depth of FIFO.
  * @data_shift: log2 of FIFO item size.
+ * @part_buf_start: Start index in part_buf.
+ * @part_buf_count: Bytes of partial data in part_buf.
+ * @part_buf: Simple buffer for partial fifo reads/writes.
  * @push_data: Pointer to FIFO push function.
  * @pull_data: Pointer to FIFO pull function.
  * @quirks: Set of quirks that apply to specific versions of the IP.
@@ -117,6 +122,7 @@
 
 	/* DMA interface members*/
 	int			use_dma;
+	int			using_dma;
 
 	dma_addr_t		sg_dma;
 	void			*sg_cpu;
@@ -131,7 +137,7 @@
 	u32			stop_cmdr;
 	u32			dir_status;
 	struct tasklet_struct	tasklet;
-	struct tasklet_struct	card_tasklet;
+	struct work_struct	card_work;
 	unsigned long		pending_events;
 	unsigned long		completed_events;
 	enum dw_mci_state	state;
@@ -146,7 +152,15 @@
 	struct dw_mci_slot	*slot[MAX_MCI_SLOTS];
 
 	/* FIFO push and pull */
+	int			fifo_depth;
 	int			data_shift;
+	u8			part_buf_start;
+	u8			part_buf_count;
+	union {
+		u16		part_buf16;
+		u32		part_buf32;
+		u64		part_buf;
+	};
 	void (*push_data)(struct dw_mci *host, void *buf, int cnt);
 	void (*pull_data)(struct dw_mci *host, void *buf, int cnt);
 
@@ -196,6 +210,12 @@
 	unsigned int bus_hz; /* Bus speed */
 
 	unsigned int caps;	/* Capabilities */
+	/*
+	 * Override fifo depth. If 0, autodetect it from the FIFOTH register,
+	 * but note that this may not be reliable after a bootloader has used
+	 * it.
+	 */
+	unsigned int fifo_depth;
 
 	/* delay in mS before detecting cards after interrupt */
 	u32 detect_delay_ms;
@@ -219,4 +239,4 @@
 	struct block_settings *blk_settings;
 };
 
-#endif /* _LINUX_MMC_DW_MMC_H_ */
+#endif /* LINUX_MMC_DW_MMC_H */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 1ee4424..0f83858 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -106,6 +106,15 @@
 	 */
 	int (*enable)(struct mmc_host *host);
 	int (*disable)(struct mmc_host *host, int lazy);
+	/*
+	 * It is optional for the host to implement pre_req and post_req in
+	 * order to support double buffering of requests (prepare one
+	 * request while another request is active).
+	 */
+	void	(*post_req)(struct mmc_host *host, struct mmc_request *req,
+			    int err);
+	void	(*pre_req)(struct mmc_host *host, struct mmc_request *req,
+			   bool is_first_req);
 	void	(*request)(struct mmc_host *host, struct mmc_request *req);
 	/*
 	 * Avoid calling these three functions too often or in a "fast path",
@@ -139,11 +148,22 @@
 	int	(*start_signal_voltage_switch)(struct mmc_host *host, struct mmc_ios *ios);
 	int	(*execute_tuning)(struct mmc_host *host);
 	void	(*enable_preset_value)(struct mmc_host *host, bool enable);
+	int	(*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv);
 };
 
 struct mmc_card;
 struct device;
 
+struct mmc_async_req {
+	/* active mmc request */
+	struct mmc_request	*mrq;
+	/*
+	 * Check error status of completed mmc request.
+	 * Returns 0 if success otherwise non zero.
+	 */
+	int (*err_check) (struct mmc_card *, struct mmc_async_req *);
+};
+
 struct mmc_host {
 	struct device		*parent;
 	struct device		class_dev;
@@ -231,6 +251,7 @@
 	unsigned int		max_req_size;	/* maximum number of bytes in one req */
 	unsigned int		max_blk_size;	/* maximum size of one mmc block */
 	unsigned int		max_blk_count;	/* maximum number of blocks in one req */
+	unsigned int		max_discard_to;	/* max. discard timeout in ms */
 
 	/* private data */
 	spinlock_t		lock;		/* lock for claim and bus ops */
@@ -281,6 +302,8 @@
 
 	struct dentry		*debugfs_root;
 
+	struct mmc_async_req	*areq;		/* active async req */
+
 	unsigned long		private[0] ____cacheline_aligned;
 };
 
@@ -373,5 +396,4 @@
 {
 	return host->caps & MMC_CAP_CMD23;
 }
-#endif
-
+#endif /* LINUX_MMC_HOST_H */
diff --git a/include/linux/mmc/ioctl.h b/include/linux/mmc/ioctl.h
index 5baf298..8fa5bc5 100644
--- a/include/linux/mmc/ioctl.h
+++ b/include/linux/mmc/ioctl.h
@@ -51,4 +51,4 @@
  * block device operations.
  */
 #define MMC_IOC_MAX_BYTES  (512L * 256)
-#endif  /* LINUX_MMC_IOCTL_H */
+#endif /* LINUX_MMC_IOCTL_H */
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index ac26a68..5a794cb 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -21,8 +21,8 @@
  *          15 May 2002
  */
 
-#ifndef MMC_MMC_H
-#define MMC_MMC_H
+#ifndef LINUX_MMC_MMC_H
+#define LINUX_MMC_MMC_H
 
 /* Standard MMC commands (4.1)           type  argument     response */
    /* class 1 */
@@ -140,6 +140,16 @@
 #define R1_SWITCH_ERROR		(1 << 7)	/* sx, c */
 #define R1_APP_CMD		(1 << 5)	/* sr, c */
 
+#define R1_STATE_IDLE	0
+#define R1_STATE_READY	1
+#define R1_STATE_IDENT	2
+#define R1_STATE_STBY	3
+#define R1_STATE_TRAN	4
+#define R1_STATE_DATA	5
+#define R1_STATE_RCV	6
+#define R1_STATE_PRG	7
+#define R1_STATE_DIS	8
+
 /*
  * MMC/SD in SPI mode reports R1 status always, and R2 for SEND_STATUS
  * R1 is the low order byte; R2 is the next highest byte, when present.
@@ -327,5 +337,4 @@
 #define MMC_SWITCH_MODE_CLEAR_BITS	0x02	/* Clear bits which are 1 in value */
 #define MMC_SWITCH_MODE_WRITE_BYTE	0x03	/* Set target to value */
 
-#endif  /* MMC_MMC_PROTOCOL_H */
-
+#endif /* LINUX_MMC_MMC_H */
diff --git a/include/linux/mmc/pm.h b/include/linux/mmc/pm.h
index d37aac4..4a13920 100644
--- a/include/linux/mmc/pm.h
+++ b/include/linux/mmc/pm.h
@@ -27,4 +27,4 @@
 #define MMC_PM_KEEP_POWER	(1 << 0)	/* preserve card power during suspend */
 #define MMC_PM_WAKE_SDIO_IRQ	(1 << 1)	/* wake up host system on SDIO IRQ assertion */
 
-#endif
+#endif /* LINUX_MMC_PM_H */
diff --git a/include/linux/mmc/sd.h b/include/linux/mmc/sd.h
index 7d35d52..1ebcf9b 100644
--- a/include/linux/mmc/sd.h
+++ b/include/linux/mmc/sd.h
@@ -9,8 +9,8 @@
  * your option) any later version.
  */
 
-#ifndef MMC_SD_H
-#define MMC_SD_H
+#ifndef LINUX_MMC_SD_H
+#define LINUX_MMC_SD_H
 
 /* SD commands                           type  argument     response */
   /* class 0 */
@@ -91,5 +91,4 @@
 #define SD_SWITCH_ACCESS_DEF	0
 #define SD_SWITCH_ACCESS_HS	1
 
-#endif
-
+#endif /* LINUX_MMC_SD_H */
diff --git a/include/linux/mmc/sdhci-pltfm.h b/include/linux/mmc/sdhci-pltfm.h
deleted file mode 100644
index 548d59d..0000000
--- a/include/linux/mmc/sdhci-pltfm.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Platform data declarations for the sdhci-pltfm driver.
- *
- * Copyright (c) 2010 MontaVista Software, LLC.
- *
- * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- */
-
-#ifndef _SDHCI_PLTFM_H
-#define _SDHCI_PLTFM_H
-
-struct sdhci_ops;
-struct sdhci_host;
-
-/**
- * struct sdhci_pltfm_data - SDHCI platform-specific information & hooks
- * @ops: optional pointer to the platform-provided SDHCI ops
- * @quirks: optional SDHCI quirks
- * @init: optional hook that is called during device probe, before the
- *        driver tries to access any SDHCI registers
- * @exit: optional hook that is called during device removal
- */
-struct sdhci_pltfm_data {
-	struct sdhci_ops *ops;
-	unsigned int quirks;
-	int (*init)(struct sdhci_host *host, struct sdhci_pltfm_data *pdata);
-	void (*exit)(struct sdhci_host *host);
-};
-
-#endif /* _SDHCI_PLTFM_H */
diff --git a/include/linux/mmc/sdhci-spear.h b/include/linux/mmc/sdhci-spear.h
index 9188c97..5cdc96d 100644
--- a/include/linux/mmc/sdhci-spear.h
+++ b/include/linux/mmc/sdhci-spear.h
@@ -11,8 +11,8 @@
  * warranty of any kind, whether express or implied.
  */
 
-#ifndef MMC_SDHCI_SPEAR_H
-#define MMC_SDHCI_SPEAR_H
+#ifndef LINUX_MMC_SDHCI_SPEAR_H
+#define LINUX_MMC_SDHCI_SPEAR_H
 
 #include <linux/platform_device.h>
 /*
@@ -39,4 +39,4 @@
 	pdev->dev.platform_data = data;
 }
 
-#endif /* MMC_SDHCI_SPEAR_H */
+#endif /* LINUX_MMC_SDHCI_SPEAR_H */
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 6a68c4e..5666f3a 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -8,8 +8,8 @@
  * the Free Software Foundation; either version 2 of the License, or (at
  * your option) any later version.
  */
-#ifndef __SDHCI_H
-#define __SDHCI_H
+#ifndef LINUX_MMC_SDHCI_H
+#define LINUX_MMC_SDHCI_H
 
 #include <linux/scatterlist.h>
 #include <linux/compiler.h>
@@ -162,4 +162,4 @@
 
 	unsigned long private[0] ____cacheline_aligned;
 };
-#endif /* __SDHCI_H */
+#endif /* LINUX_MMC_SDHCI_H */
diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h
index 245cdac..2a2e990 100644
--- a/include/linux/mmc/sdio.h
+++ b/include/linux/mmc/sdio.h
@@ -9,8 +9,8 @@
  * your option) any later version.
  */
 
-#ifndef MMC_SDIO_H
-#define MMC_SDIO_H
+#ifndef LINUX_MMC_SDIO_H
+#define LINUX_MMC_SDIO_H
 
 /* SDIO commands                         type  argument     response */
 #define SD_IO_SEND_OP_COND          5 /* bcr  [23:0] OCR         R4  */
@@ -161,5 +161,4 @@
 
 #define SDIO_FBR_BLKSIZE	0x10	/* block size (2 bytes) */
 
-#endif
-
+#endif /* LINUX_MMC_SDIO_H */
diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h
index 31baaf8..50f0bc9 100644
--- a/include/linux/mmc/sdio_func.h
+++ b/include/linux/mmc/sdio_func.h
@@ -9,8 +9,8 @@
  * your option) any later version.
  */
 
-#ifndef MMC_SDIO_FUNC_H
-#define MMC_SDIO_FUNC_H
+#ifndef LINUX_MMC_SDIO_FUNC_H
+#define LINUX_MMC_SDIO_FUNC_H
 
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
@@ -161,5 +161,4 @@
 extern mmc_pm_flag_t sdio_get_host_pm_caps(struct sdio_func *func);
 extern int sdio_set_host_pm_flags(struct sdio_func *func, mmc_pm_flag_t flags);
 
-#endif
-
+#endif /* LINUX_MMC_SDIO_FUNC_H */
diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index a36ab3b..9f03fee 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -2,8 +2,8 @@
  * SDIO Classes, Interface Types, Manufacturer IDs, etc.
  */
 
-#ifndef MMC_SDIO_IDS_H
-#define MMC_SDIO_IDS_H
+#ifndef LINUX_MMC_SDIO_IDS_H
+#define LINUX_MMC_SDIO_IDS_H
 
 /*
  * Standard SDIO Function Interfaces
@@ -44,4 +44,4 @@
 #define SDIO_DEVICE_ID_SIANO_NOVA_A0		0x1100
 #define SDIO_DEVICE_ID_SIANO_STELLAR 		0x5347
 
-#endif
+#endif /* LINUX_MMC_SDIO_IDS_H */
diff --git a/include/linux/mmc/sh_mmcif.h b/include/linux/mmc/sh_mmcif.h
index 9eb9b4b..0222cd8 100644
--- a/include/linux/mmc/sh_mmcif.h
+++ b/include/linux/mmc/sh_mmcif.h
@@ -11,8 +11,8 @@
  *
  */
 
-#ifndef __SH_MMCIF_H__
-#define __SH_MMCIF_H__
+#ifndef LINUX_MMC_SH_MMCIF_H
+#define LINUX_MMC_SH_MMCIF_H
 
 #include <linux/io.h>
 #include <linux/platform_device.h>
@@ -220,4 +220,4 @@
 	sh_mmcif_boot_cmd(base, 0x03400040, 0x00010000);
 }
 
-#endif /* __SH_MMCIF_H__ */
+#endif /* LINUX_MMC_SH_MMCIF_H */
diff --git a/include/linux/mmc/sh_mobile_sdhi.h b/include/linux/mmc/sh_mobile_sdhi.h
index faf32b6..bd50b36 100644
--- a/include/linux/mmc/sh_mobile_sdhi.h
+++ b/include/linux/mmc/sh_mobile_sdhi.h
@@ -1,5 +1,5 @@
-#ifndef __SH_MOBILE_SDHI_H__
-#define __SH_MOBILE_SDHI_H__
+#ifndef LINUX_MMC_SH_MOBILE_SDHI_H
+#define LINUX_MMC_SH_MOBILE_SDHI_H
 
 #include <linux/types.h>
 
@@ -17,4 +17,4 @@
 	int (*get_cd)(struct platform_device *pdev);
 };
 
-#endif /* __SH_MOBILE_SDHI_H__ */
+#endif /* LINUX_MMC_SH_MOBILE_SDHI_H */
diff --git a/include/linux/mmc/tmio.h b/include/linux/mmc/tmio.h
index 19490b9..a1c1f32 100644
--- a/include/linux/mmc/tmio.h
+++ b/include/linux/mmc/tmio.h
@@ -12,8 +12,8 @@
  *
  * TC6393XB TC6391XB TC6387XB T7L66XB ASIC3
  */
-#ifndef _LINUX_MMC_TMIO_H_
-#define _LINUX_MMC_TMIO_H_
+#ifndef LINUX_MMC_TMIO_H
+#define LINUX_MMC_TMIO_H
 
 #define CTL_SD_CMD 0x00
 #define CTL_ARG_REG 0x04
@@ -21,6 +21,7 @@
 #define CTL_XFER_BLK_COUNT 0xa
 #define CTL_RESPONSE 0x0c
 #define CTL_STATUS 0x1c
+#define CTL_STATUS2 0x1e
 #define CTL_IRQ_MASK 0x20
 #define CTL_SD_CARD_CLK_CTL 0x24
 #define CTL_SD_XFER_LEN 0x26
@@ -30,6 +31,7 @@
 #define CTL_TRANSACTION_CTL 0x34
 #define CTL_SDIO_STATUS 0x36
 #define CTL_SDIO_IRQ_MASK 0x38
+#define CTL_DMA_ENABLE 0xd8
 #define CTL_RESET_SD 0xe0
 #define CTL_SDIO_REGS 0x100
 #define CTL_CLK_AND_WAIT_CTL 0x138
@@ -60,4 +62,4 @@
 
 #define TMIO_BBS		512		/* Boot block size */
 
-#endif /* _LINUX_MMC_TMIO_H_ */
+#endif /* LINUX_MMC_TMIO_H */
diff --git a/include/linux/platform_data/pxa_sdhci.h b/include/linux/platform_data/pxa_sdhci.h
new file mode 100644
index 0000000..51ad099
--- /dev/null
+++ b/include/linux/platform_data/pxa_sdhci.h
@@ -0,0 +1,60 @@
+/*
+ * include/linux/platform_data/pxa_sdhci.h
+ *
+ * Copyright 2010 Marvell
+ *	Zhangfei Gao <zhangfei.gao@marvell.com>
+ *
+ * PXA Platform - SDHCI platform data definitions
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _PXA_SDHCI_H_
+#define _PXA_SDHCI_H_
+
+/* pxa specific flag */
+/* Require clock free running */
+#define PXA_FLAG_ENABLE_CLOCK_GATING (1<<0)
+/* card always wired to host, like on-chip emmc */
+#define PXA_FLAG_CARD_PERMANENT	(1<<1)
+/* Board design supports 8-bit data on SD/SDIO BUS */
+#define PXA_FLAG_SD_8_BIT_CAPABLE_SLOT (1<<2)
+
+/*
+ * struct pxa_sdhci_platdata() - Platform device data for PXA SDHCI
+ * @flags: flags for platform requirement
+ * @clk_delay_cycles:
+ *	mmp2: each step is roughly 100ps, 5bits width
+ *	pxa910: each step is 1ns, 4bits width
+ * @clk_delay_sel: select clk_delay, used on pxa910
+ *	0: choose feedback clk
+ *	1: choose feedback clk + delay value
+ *	2: choose internal clk
+ * @clk_delay_enable: enable clk_delay or not, used on pxa910
+ * @ext_cd_gpio: gpio pin used for external CD line
+ * @ext_cd_gpio_invert: invert values for external CD gpio line
+ * @max_speed: the maximum speed supported
+ * @host_caps: Standard MMC host capabilities bit field.
+ * @quirks: quirks of platfrom
+ * @pm_caps: pm_caps of platfrom
+ */
+struct sdhci_pxa_platdata {
+	unsigned int	flags;
+	unsigned int	clk_delay_cycles;
+	unsigned int	clk_delay_sel;
+	bool		clk_delay_enable;
+	unsigned int	ext_cd_gpio;
+	bool		ext_cd_gpio_invert;
+	unsigned int	max_speed;
+	unsigned int	host_caps;
+	unsigned int	quirks;
+	unsigned int	pm_caps;
+};
+
+struct sdhci_pxa {
+	u8	clk_enable;
+	u8	power_mode;
+};
+#endif /* _PXA_SDHCI_H_ */